Example #1
0
 def test_greedy(self, graph, path=None):
     self.problem.g = to_cuda(graph)
     res = [self.problem.calc_S().item()]
     pb = self.problem
     if path is not None:
         path = os.path.abspath(os.path.join(os.getcwd())) + path
         vis_g(pb, name=path + str(0), topo='cut')
     R = []
     Reward = []
     for j in range(100):
         M = []
         actions = pb.get_legal_actions()
         for k in range(actions.shape[0]):
             _, r = pb.step(actions[k], state=dc(pb.g))
             M.append(r)
         if max(M) <= 0:
             break
         if path is not None:
             vis_g(pb, name=path + str(j + 1), topo='cut')
         posi = [x for x in M if x > 0]
         nega = [x for x in M if x <= 0]
         # print('posi reward ratio:', len(posi) / len(M))
         # print('posi reward avg:', sum(posi) / len(posi))
         # print('nega reward avg:', sum(nega) / len(nega))
         max_idx = torch.tensor(M).argmax().item()
         _, r = pb.step((actions[max_idx, 0].item(), actions[max_idx,
                                                             1].item()))
         R.append((actions[max_idx, 0].item(), actions[max_idx,
                                                       1].item(), r.item()))
         Reward.append(r.item())
         res.append(res[-1] - r.item())
     return QtableKey2state(
         state2QtableKey(
             pb.g.ndata['label'].argmax(dim=1).cpu().numpy())), R, res
Example #2
0
    def cmpt_optimal(self, graph, path=None):

        self.problem.g = to_cuda(graph)
        res = [self.problem.calc_S().item()]
        pb = self.problem

        S = []
        for j in range(280):
            pb.reset_label(QtableKey2state(self.all_states[j]))
            S.append(pb.calc_S())

        s1 = torch.tensor(S).argmin()
        res.append(S[s1].item())

        if path is not None:
            path = os.path.abspath(os.path.join(os.getcwd())) + path
            pb.reset_label(QtableKey2state(self.all_states[s1]))
            vis_g(pb, name=path, topo='cut')
        return QtableKey2state(self.all_states[s1]), res
Example #3
0
    def test_dqn(self, alg, g_i, t, init_label=None, path=None):

        init_state = dc(self.episodes[g_i].init_state)
        label_history = init_state.ndata['label'][
            self.episodes[g_i].label_perm]

        self.problem.g = dc(init_state)
        if init_label is None:
            self.problem.reset_label(label=label_history[0].argmax(dim=1))
        else:
            self.problem.reset_label(label=init_label)
        S = self.problem.calc_S()
        print('init S:', S)
        if path is not None:
            path = os.path.abspath(os.path.join(os.getcwd())) + path
            vis_g(self.problem, name=path + str(0), topo='cut')

        for i in range(t):
            actions = self.problem.get_legal_actions(
                state=self.problem.g, action_type=self.action_type)
            S_a_encoding, h1, h2, Q_sa = alg.forward(
                to_cuda(self.problem.g),
                actions.cuda(),
                action_type=self.action_type,
                gnn_step=3)
            _, r = self.problem.step(state=self.problem.g,
                                     action=actions[torch.argmax(Q_sa)],
                                     action_type=self.action_type)

            print(Q_sa.detach().cpu().numpy())
            print('action index:', torch.argmax(Q_sa).detach().cpu().item())
            print('action:',
                  actions[torch.argmax(Q_sa)].detach().cpu().numpy())
            print('reward:', r.item())
            S -= r.item()
            print('kcut S:', S)
            if path is not None:
                vis_g(self.problem, name=path + str(i + 1), topo='cut')
Example #4
0
torch.tensor(subopt_q_mean_win).mean()
torch.tensor(subopt_q_mean_sway).mean()
torch.tensor(subopt_q_mean_zero).mean()
torch.tensor(subopt_q_mean_fail).mean()
torch.tensor(subopt_q_mean).mean()
torch.tensor(opt_q_mean).mean()
# visualization
gi = 0
test.episodes[gi].action_seq
test.episodes[gi].reward_seq
test.cmpt_optimal(test.episodes[gi].init_state, '/Analysis/eval_model/opt')
test.test_greedy(test.episodes[gi].init_state, '/Analysis/eval_model/grd_')
test.test_dqn(alg, gi, 10, '/Analysis/eval_model/dqn_')
# visualization
path = os.path.abspath(os.path.join(os.getcwd())) + '/Analysis/eval_model/test1'
vis_g(problem, name=path, topo='cut')




g_i = 1
test.episodes[g_i].reward_seq[:10]
test.episodes[g_i].
validation_problem1[g_i][1]



init_state = dc(test.episodes[g_i].init_state)
label_history = init_state.ndata['label'][test.episodes[g_i].label_perm]
label_history[0].argmax(dim=1)