def load_models(self, filename, path='models'): path = os.path.join(path, filename) if not os.path.isfile(path): raise Exception('No such path:' + path) temp = torch.load(path) self.actor_critic.load_state_dict(temp['model'], map_location=self.device) utls.copy_parameters(self.target_actor_critic, self.actor_critic)
def gradient_update_sarsa(self, batch_size=100): s, a, r, s1, a1, ano = self.replay.sample_(batch_size) sa = [np.array(x[0], dtype=np.float32) for x in s] sb = [np.array(x[1], dtype=np.float32) for x in s] sa = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in sa] sb = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in sb] a1 = [np.array(x, dtype=np.float32) for x in a1] a1 = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in a1] a = Variable(torch.from_numpy(np.array(a, dtype=np.float32)).to(self.device)) r = Variable(torch.from_numpy(np.array(r, dtype=np.float32)).to(self.device)) s1a = [np.array(x[0], dtype=np.float32) for x in s1] s1b = [np.array(x[1], dtype=np.float32) for x in s1] s1a = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in s1a] s1b = [Variable(torch.from_numpy(x.reshape((1,) + x.shape)).to(self.device)) for x in s1b] # ac = Variable(torch.from_numpy(np.array(a1, dtype=np.float32))).to(self.device) self.q_next1 = torch.squeeze(torch.cat([self.target_actor_critic.critic_forward1(x[0], x[1], x[2], adjust_dim=False, convert_torch=False, node_labels=False).detach() for x in zip(s1a, s1b, a1)], dim=0)) self.q_next2 = torch.squeeze(torch.cat([self.target_actor_critic.critic_forward2(x[0], x[1], x[2], adjust_dim=False, convert_torch=False, node_labels=False).detach() for x in zip(s1a, s1b, a1)], dim=0)) self.q_next = torch.min(self.q_next1, self.q_next2).detach() self.q_expected = r + self.gamma * self.q_next self.r = r self.q_predicted1 = torch.squeeze(torch.cat([self.actor_critic.forward(x[0], x[1], x[2].view((1,) + x[2].shape), adjust_dim=False, convert_torch=False, node_labels=False)[1] for x in zip(sa, sb, a)], dim=0)) self.q_predicted2 = torch.squeeze(torch.cat([self.actor_critic.forward(x[0], x[1], x[2].view((1,) + x[2].shape), adjust_dim=False, convert_torch=False, node_labels=False)[2] for x in zip(sa, sb, a)], dim=0)) # self.loss_critic = F.smooth_l1_loss(q_predicted, q_expected) self.loss_critic = F.mse_loss(self.q_predicted1, self.q_expected) + F.mse_loss(self.q_predicted2, self.q_expected) # self.actor_critic_opt.zero_grad() self.critic_opt.zero_grad() self.loss_critic.backward() # self.actor_critic_opt.step() self.critic_opt.step() utls.copy_parameters(self.target_actor_critic, self.actor_critic, self.eta)
def __init__(self, input_dim, state_dim, action_dim, replayBuff, lr=1e-3, gamma=0.99, eta=1e-3, gcn_num_layers=2, num_pooling=1, assign_hidden_dim=40, assign_dim=40, num_aggs=1, use_cuda=True): self.state_dim = state_dim self.action_dim = action_dim self.input_dim = input_dim self.lr = lr self.it = 0 self.replay = replayBuff if use_cuda: self.device = utls.get_torch_device() else: self.device = torch.device("cpu") self.actor_critic = GraphTD3(input_dim=input_dim, node_embedding_dim=action_dim, graph_embedding_dim=state_dim, use_cuda=use_cuda, gcn_num_layers=gcn_num_layers, num_pooling=num_pooling, assign_dim=assign_dim, num_aggs=num_aggs, max_num_nodes=assign_hidden_dim).to(self.device) self.target_actor_critic = GraphTD3(input_dim=input_dim, node_embedding_dim=action_dim, graph_embedding_dim=state_dim, use_cuda=use_cuda, gcn_num_layers=gcn_num_layers, num_pooling=num_pooling, assign_dim=assign_dim, num_aggs=num_aggs, max_num_nodes=assign_hidden_dim).to(self.device) # self.actor_critic_opt = torch.optim.Adam(self.actor_critic.parameters(),self.lr, weight_decay=0.001) self.critic_opt = torch.optim.Adam( list(self.actor_critic.critic.parameters()) + list(self.actor_critic.graph_embedder.parameters()), self.lr, weight_decay=0.001) # self.actor_opt = torch.optim.Adam(self.actor_critic.actor.parameters(), self.lr, weight_decay=0.001) self.gamma = gamma self.eta = eta utls.copy_parameters(self.target_actor_critic, self.actor_critic)