#### GCN Policy
state = dc(init_state)
if cuda_flag:
    state.g.ndata['x'] = state.g.ndata['x'].cuda()
sum_r = 0
T1 = time.time()
[idx1, idx2] = mvc.get_ilegal_actions(state)
while done == False:
    G = state.g
    [pi, val] = NN(G)
    pi = pi.squeeze()
    pi[idx1] = -float('Inf')
    pi = F.softmax(pi, dim=0)
    dist = torch.distributions.categorical.Categorical(pi)
    action = dist.sample()
    new_state, reward, done = mvc.step(state, action)
    [idx1, idx2] = mvc.get_ilegal_actions(new_state)
    state = new_state
    sum_r += reward
T2 = time.time()

node_tag = state.g.ndata['x'][:, 0].cpu().squeeze().numpy().tolist()
nx.draw(state.g.to_networkx(), pos, node_color=node_tag, with_labels=True)
plt.show()

### Heuristic Policy
state = dc(init_state)
done = False
sum_r2 = 0
T1 = time.time()
[idx1, idx2] = mvc.get_ilegal_actions(state)