class Acrobot_GN(Acrobot):
    '''
    '''
    STATE_THETA_1, STATE_THETA_2, STATE_V_1, STATE_V_2 = 0, 1, 2, 3
    MIN_V_1, MAX_V_1 = -6., 6.
    MIN_V_2, MAX_V_2 = -6., 6.
    MIN_TORQUE, MAX_TORQUE = -4., 4.

    MIN_ANGLE, MAX_ANGLE = -np.pi, np.pi

    LENGTH = 20.
    m = 1.0
    lc = 0.5
    lc2 = 0.25
    l2 = 1.
    I1 = 0.2
    I2 = 1.0
    g = 9.81

    def __init__(self):
        super(Acrobot, self).__init__()
        self.G1 = nx.path_graph(2).to_directed()
        self.node_feat_size = 2
        self.edge_feat_size = 3
        self.graph_feat_size = 10
        self.gn = FFGN(self.graph_feat_size, self.node_feat_size,
                       self.edge_feat_size).cuda()
        self.gn.load_state_dict(torch.load('model0.05.pth'))
        normalizers = torch.load('normalized/acrobot0.05.pth')
        self.in_normalizer = normalizers['in_normalizer']
        self.out_normalizer = normalizers['out_normalizer']

    def propagate(self, start_state, control, num_steps, integration_step):
        '''
        Integrate system dynamics
        :param start_state: numpy array with the start state for the integration
        :param control: numpy array with constant controls to be applied during integration
        :param num_steps: number of steps to integrate
        :param integration_step: dt of integration
        :return: new state of the system
        '''
        action = control
        delta_state = torch.zeros_like(start_state)
        last_state = start_state.clone()
        for i in range(num_steps):
            init_graph_features(self.G1,
                                self.graph_feat_size,
                                self.node_feat_size,
                                self.edge_feat_size,
                                cuda=True,
                                bs=1)
            load_graph_features(self.G1,
                                action,
                                last_state.clone(),
                                None,
                                bs=1,
                                noise=0,
                                std=None)
            self.gn.eval()
            # with torch.no_grad():
            G_out = self.gn(self.in_normalizer.normalize(self.G1))
            for node in G_out.nodes():
                delta_state[0, [node, node +
                                2]] = G_out.nodes[node]['feat'][0].cpu()
            last_state += delta_state
        return last_state

    def visualize_point(self, state):
        x2 = self.LENGTH * np.cos(state[self.STATE_THETA_1] -
                                  np.pi / 2) + self.LENGTH * np.cos(
                                      state[self.STATE_THETA_1] +
                                      state[self.STATE_THETA_2] - np.pi / 2)
        y2 = self.LENGTH * np.sin(state[self.STATE_THETA_1] -
                                  np.pi / 2) + self.LENGTH * np.sin(
                                      state[self.STATE_THETA_1] +
                                      state[self.STATE_THETA_2] - np.pi / 2)
        x1 = self.LENGTH * np.cos(state[self.STATE_THETA_1] - np.pi / 2)
        y1 = self.LENGTH * np.sin(state[self.STATE_THETA_1] - np.pi / 2)
        # x2 = (x + 2 * self.LENGTH) / (4 * self.LENGTH)
        # y2 = (y + 2 * self.LENGTH) / (4 * self.LENGTH)
        return x1, y1, x2, y2

    def get_state_bounds(self):
        return [(self.MIN_ANGLE, self.MAX_ANGLE),
                (self.MIN_ANGLE, self.MAX_ANGLE), (self.MIN_V_1, self.MAX_V_1),
                (self.MIN_V_2, self.MAX_V_2)]

    def get_control_bounds(self):
        return [(self.MIN_TORQUE, self.MAX_TORQUE)]
Ejemplo n.º 2
0
        # evaluation loop, done every epoch

        for data in tqdm(dl_eval):
            action, delta_state, last_state = data
            action, delta_state, last_state = action.float(), \
                delta_state.float(), last_state.float()
            if use_cuda:
                action, delta_state, last_state = action.cuda(),\
                    delta_state.cuda(), last_state.cuda()

            init_graph_features(G1, graph_feat_size, node_feat_size,
                                edge_feat_size, cuda=True, bs=200)
            load_graph_features(G1, action, last_state, delta_state,
                                bs=200, noise=0)
            gn.eval()
            G_out = gn(G1)#gn(in_normalizer.normalize(G1))

            init_graph_features(G_target, graph_feat_size, node_feat_size,
                                edge_feat_size, cuda=True, bs=200)
            load_graph_features(G_target, action, delta_state, None, bs=200,
                                norm=False, noise=0)
            #  G_target_normalized = out_normalizer.normalize(G_target, False)

            #  loss = get_graph_loss(out_normalizer.normalize(G_out, False), G_target_normalized)
            loss = get_graph_loss(G_out, G_target, opt.tstep)
            sum_loss += loss.data.item()
            itr += 1

        writer.add_scalar('loss_eval', sum_loss / float(itr), step)