def test_greedy(self, graph, path=None): self.problem.g = to_cuda(graph) res = [self.problem.calc_S().item()] pb = self.problem if path is not None: path = os.path.abspath(os.path.join(os.getcwd())) + path vis_g(pb, name=path + str(0), topo='cut') R = [] Reward = [] for j in range(100): M = [] actions = pb.get_legal_actions() for k in range(actions.shape[0]): _, r = pb.step(actions[k], state=dc(pb.g)) M.append(r) if max(M) <= 0: break if path is not None: vis_g(pb, name=path + str(j + 1), topo='cut') posi = [x for x in M if x > 0] nega = [x for x in M if x <= 0] # print('posi reward ratio:', len(posi) / len(M)) # print('posi reward avg:', sum(posi) / len(posi)) # print('nega reward avg:', sum(nega) / len(nega)) max_idx = torch.tensor(M).argmax().item() _, r = pb.step((actions[max_idx, 0].item(), actions[max_idx, 1].item())) R.append((actions[max_idx, 0].item(), actions[max_idx, 1].item(), r.item())) Reward.append(r.item()) res.append(res[-1] - r.item()) return QtableKey2state( state2QtableKey( pb.g.ndata['label'].argmax(dim=1).cpu().numpy())), R, res
def cmpt_optimal(self, graph, path=None): self.problem.g = to_cuda(graph) res = [self.problem.calc_S().item()] pb = self.problem S = [] for j in range(280): pb.reset_label(QtableKey2state(self.all_states[j])) S.append(pb.calc_S()) s1 = torch.tensor(S).argmin() res.append(S[s1].item()) if path is not None: path = os.path.abspath(os.path.join(os.getcwd())) + path pb.reset_label(QtableKey2state(self.all_states[s1])) vis_g(pb, name=path, topo='cut') return QtableKey2state(self.all_states[s1]), res
def test_dqn(self, alg, g_i, t, init_label=None, path=None): init_state = dc(self.episodes[g_i].init_state) label_history = init_state.ndata['label'][ self.episodes[g_i].label_perm] self.problem.g = dc(init_state) if init_label is None: self.problem.reset_label(label=label_history[0].argmax(dim=1)) else: self.problem.reset_label(label=init_label) S = self.problem.calc_S() print('init S:', S) if path is not None: path = os.path.abspath(os.path.join(os.getcwd())) + path vis_g(self.problem, name=path + str(0), topo='cut') for i in range(t): actions = self.problem.get_legal_actions( state=self.problem.g, action_type=self.action_type) S_a_encoding, h1, h2, Q_sa = alg.forward( to_cuda(self.problem.g), actions.cuda(), action_type=self.action_type, gnn_step=3) _, r = self.problem.step(state=self.problem.g, action=actions[torch.argmax(Q_sa)], action_type=self.action_type) print(Q_sa.detach().cpu().numpy()) print('action index:', torch.argmax(Q_sa).detach().cpu().item()) print('action:', actions[torch.argmax(Q_sa)].detach().cpu().numpy()) print('reward:', r.item()) S -= r.item() print('kcut S:', S) if path is not None: vis_g(self.problem, name=path + str(i + 1), topo='cut')
torch.tensor(subopt_q_mean_win).mean() torch.tensor(subopt_q_mean_sway).mean() torch.tensor(subopt_q_mean_zero).mean() torch.tensor(subopt_q_mean_fail).mean() torch.tensor(subopt_q_mean).mean() torch.tensor(opt_q_mean).mean() # visualization gi = 0 test.episodes[gi].action_seq test.episodes[gi].reward_seq test.cmpt_optimal(test.episodes[gi].init_state, '/Analysis/eval_model/opt') test.test_greedy(test.episodes[gi].init_state, '/Analysis/eval_model/grd_') test.test_dqn(alg, gi, 10, '/Analysis/eval_model/dqn_') # visualization path = os.path.abspath(os.path.join(os.getcwd())) + '/Analysis/eval_model/test1' vis_g(problem, name=path, topo='cut') g_i = 1 test.episodes[g_i].reward_seq[:10] test.episodes[g_i]. validation_problem1[g_i][1] init_state = dc(test.episodes[g_i].init_state) label_history = init_state.ndata['label'][test.episodes[g_i].label_perm] label_history[0].argmax(dim=1)