Exemple #1
0
    def load_models(self, filename, path='models'):
        path = os.path.join(path, filename)
        if not os.path.isfile(path):
            raise Exception('No such path:' + path)
        temp = torch.load(path)
        self.actor_critic.load_state_dict(temp['model'], map_location=self.device)

        utls.copy_parameters(self.target_actor_critic, self.actor_critic)
Exemple #2
0
    def gradient_update_sarsa(self, batch_size=100):
        s, a, r, s1, a1, ano = self.replay.sample_(batch_size)
        sa = [np.array(x[0], dtype=np.float32) for x in s]
        sb = [np.array(x[1], dtype=np.float32) for x in s]
        sa = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in sa]
        sb = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in sb]
        a1 = [np.array(x, dtype=np.float32) for x in a1]
        a1 = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in a1]
        a = Variable(torch.from_numpy(np.array(a, dtype=np.float32)).to(self.device))
        r = Variable(torch.from_numpy(np.array(r, dtype=np.float32)).to(self.device))
        s1a = [np.array(x[0], dtype=np.float32) for x in s1]
        s1b = [np.array(x[1], dtype=np.float32) for x in s1]
        s1a = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in s1a]
        s1b = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in s1b]
        # ac = Variable(torch.from_numpy(np.array(a1, dtype=np.float32))).to(self.device)

        self.q_next1 = torch.squeeze(torch.cat([self.target_actor_critic.critic_forward1(x[0], x[1], x[2],
                                                                                         adjust_dim=False,
                                                                                         convert_torch=False,
                                                                                         node_labels=False).detach() for
                                                x in zip(s1a, s1b, a1)], dim=0))
        self.q_next2 = torch.squeeze(torch.cat([self.target_actor_critic.critic_forward2(x[0], x[1], x[2],
                                                                                         adjust_dim=False,
                                                                                         convert_torch=False,
                                                                                         node_labels=False).detach() for
                                                x in zip(s1a, s1b, a1)], dim=0))
        self.q_next = torch.min(self.q_next1, self.q_next2).detach()
        self.q_expected = r + self.gamma * self.q_next
        self.r = r
        self.q_predicted1 = torch.squeeze(torch.cat([self.actor_critic.forward(x[0], x[1], x[2].view((1,) + x[2].shape),
                                                                               adjust_dim=False, convert_torch=False,
                                                                               node_labels=False)[1] for x in
                                                     zip(sa, sb, a)], dim=0))
        self.q_predicted2 = torch.squeeze(torch.cat([self.actor_critic.forward(x[0], x[1], x[2].view((1,) + x[2].shape),
                                                                               adjust_dim=False, convert_torch=False,
                                                                               node_labels=False)[2] for x in
                                                     zip(sa, sb, a)], dim=0))

        # self.loss_critic = F.smooth_l1_loss(q_predicted, q_expected)
        self.loss_critic = F.mse_loss(self.q_predicted1, self.q_expected) + F.mse_loss(self.q_predicted2,
                                                                                       self.q_expected)
        # self.actor_critic_opt.zero_grad()
        self.critic_opt.zero_grad()
        self.loss_critic.backward()
        # self.actor_critic_opt.step()
        self.critic_opt.step()

        utls.copy_parameters(self.target_actor_critic, self.actor_critic, self.eta)
Exemple #3
0
    def __init__(self, input_dim, state_dim, action_dim, replayBuff, lr=1e-3, gamma=0.99, eta=1e-3,
                 gcn_num_layers=2, num_pooling=1, assign_hidden_dim=40, assign_dim=40, num_aggs=1, use_cuda=True):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.input_dim = input_dim

        self.lr = lr

        self.it = 0
        self.replay = replayBuff
        if use_cuda:
            self.device = utls.get_torch_device()
        else:
            self.device = torch.device("cpu")

        self.actor_critic = GraphTD3(input_dim=input_dim, node_embedding_dim=action_dim, graph_embedding_dim=state_dim,
                                     use_cuda=use_cuda,
                                     gcn_num_layers=gcn_num_layers, num_pooling=num_pooling, assign_dim=assign_dim,
                                     num_aggs=num_aggs, max_num_nodes=assign_hidden_dim).to(self.device)

        self.target_actor_critic = GraphTD3(input_dim=input_dim, node_embedding_dim=action_dim,
                                            graph_embedding_dim=state_dim, use_cuda=use_cuda,
                                            gcn_num_layers=gcn_num_layers, num_pooling=num_pooling,
                                            assign_dim=assign_dim, num_aggs=num_aggs,
                                            max_num_nodes=assign_hidden_dim).to(self.device)

        # self.actor_critic_opt = torch.optim.Adam(self.actor_critic.parameters(),self.lr, weight_decay=0.001)
        self.critic_opt = torch.optim.Adam(
            list(self.actor_critic.critic.parameters()) + list(self.actor_critic.graph_embedder.parameters()), self.lr,
            weight_decay=0.001)
        # self.actor_opt = torch.optim.Adam(self.actor_critic.actor.parameters(), self.lr, weight_decay=0.001)

        self.gamma = gamma
        self.eta = eta

        utls.copy_parameters(self.target_actor_critic, self.actor_critic)