def get_val_result(self, validation_graph):

        objective_vals = []
        for g in validation_graph:
            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            graph = torch.unsqueeze(graph, 0)
            Xv = Xv.clone()
            Xv = Xv.cuda()
            graph = graph.to(self.device)
            done = False
            self.non_selected = list(np.arange(env.num_nodes))
            self.selected = []
            while done == False:
                #Xv = Xv.cuda()
                Xv = Xv.to(self.device)
                val = self.forward(graph, Xv)[0]
                #val[selected] = -float('inf')
                #print(val)
                #action = int(torch.argmax(val).item())
                action = self.take_action(graph, Xv, is_validation=True)

                Xv_next, reward, done = env.take_action(action)
                #non_selected.remove(action)
                #selected.append(action)
                Xv = Xv_next
            #print(selected)
            objective_vals.append(len(self.selected))
        return sum(objective_vals) / len(objective_vals)
    def train(self, g, num_eps=20):

        N_STEP = 2
        fitted_q_exp = namedtuple("fitted_exp",
                                  ['graph', 'Xv', 'action', 'reward'])
        experience = namedtuple(
            "experience",
            ['graph', 'Xv', 'action', 'reward', 'next_Xv', 'is_done'])
        EPS_START = 1.00
        EPS_END = 0.05
        EPS_DECAY = 500
        steps_done = 0
        for e in range(num_eps):

            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            Xv = Xv.clone()
            graph = torch.unsqueeze(graph, 0)
            done = False
            non_selected = list(np.arange(env.num_nodes))
            selected = []
            N = 0
            fitted_experience_list = []
            reward_list = []
            self.agent.new_epsiode()
            while done == False:
                eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(
                    -1. * steps_done / EPS_DECAY)
                if np.random.uniform() > eps_threshold:
                    val = self.agent(graph, Xv)[0]
                    val[selected] = -float('inf')
                    action = int(torch.argmax(val).item())
                else:
                    action = int(np.random.choice(non_selected))
                Xv_next, reward, done = env.take_action(action)
                Xv_next = Xv_next.clone()
                fit_ex = fitted_q_exp(graph, Xv, action, reward)
                fitted_experience_list.append(fit_ex)
                non_selected.remove(action)
                selected.append(action)
                N += 1
                reward_list.append(reward)

                if N >= N_STEP:
                    n_reward = sum(reward_list)
                    n_prev_ex = fitted_experience_list[0]
                    n_graph = n_prev_ex.graph
                    n_Xv = n_prev_ex.Xv
                    n_action = n_prev_ex.action
                    ex = experience(n_graph, n_Xv, torch.tensor([n_action]),
                                    torch.tensor([n_reward]), Xv_next, done)
                    self.agent.store_transition(ex)
                    fitted_experience_list.pop(0)
                    reward_list.pop(0)

                Xv = Xv_next
                steps_done += 1
                self.agent.train(batch_size=8, fitted_Q=True)
    def get_val_result_batch(self, validation_graph, return_list=False):
        N = len(validation_graph)
        all_graphs = []
        all_Xv = []
        all_envs = []
        for g in validation_graph:
            env = MVC_environement(g)
            all_envs.append(env)
            Xv, graph = env.reset_env()
            graph = torch.unsqueeze(graph, 0)
            all_graphs.append(graph)
            all_Xv.append(Xv)
        all_graphs = torch.cat(all_graphs, 0).to(self.device)
        all_Xv = torch.cat(all_Xv, 0).to(self.device)
        all_selected = [[] for _ in range(N)]
        all_dones = [False for _ in range(N)]
        all_done = False
        done_count = 0
        while all_done == False:
            q_val = self.dqn(all_graphs, all_Xv)
            for i in range(N):

                if all_dones[i]:
                    continue

                q_val[i][all_selected[i]] = -float('inf')
                action = torch.argmax(q_val[i]).item()
                all_selected[i].append(action)
                _, _, done = all_envs[i].take_action(action)
                all_Xv[i][action] = 1
                if done:
                    all_dones[i] = True
                    done_count += 1
            if (done_count == N):
                break
            #print(all_Xv)
            #print(q_val.shape)
            #del all_graphs
            #del all_Xv
            #break
        objective_vals = []
        for s in all_selected:
            objective_vals.append(len(s))

        if return_list:
            return objective_vals

        return sum(objective_vals) / len(objective_vals)
Beispiel #4
0
    def train_with_graph(self, g):
        '''
        g: networkx graph
        '''
        N_STEP = 2
        env = MVC_environement(g)
        Xv, graph = env.reset_env()
        Xv = Xv.clone()
        graph = torch.unsqueeze(graph, 0)
        done = False
        fitted_experience_list = []
        reward_list = []
        #self.new_epsiode()
        self.non_selected = list(np.arange(env.num_nodes))
        self.selected = []
        self.N = 0
        while done == False:

            action = self.take_action(graph, Xv)
            Xv_next, reward, done = env.take_action(action)
            Xv_next = Xv_next.clone()
            fit_ex = fitted_q_exp(graph, Xv, action, reward)
            fitted_experience_list.append(fit_ex)
            self.N += 1
            reward_list.append(reward)
            if self.N >= N_STEP:
                n_reward = sum(reward_list)
                n_prev_ex = fitted_experience_list[0]
                n_graph = n_prev_ex.graph
                n_Xv = n_prev_ex.Xv
                n_action = n_prev_ex.action

                #print(sum(n_Xv[0]) , sum(Xv_next[0]))
                #aa
                ex = experience(n_graph, n_Xv, torch.tensor([n_action]),
                                torch.tensor([n_reward]), Xv_next, done)

                self.store_transition(ex)
                fitted_experience_list.pop(0)
                reward_list.pop(0)
            Xv = Xv_next
            self.train()
        self.episode_done += 1
        if self.episode_done > 0 and self.episode_done % 8 == 0:
            #print(self.steps_done)
            self.update_target_network()
Beispiel #5
0
    def get_val_result(self, validation_graph, run_sparse=False):

        if type(validation_graph) is not list:
            validation_graph = [validation_graph]

        objective_vals = []
        for g in validation_graph:
            env = MVC_environement(g)
            Xv, graph = env.reset_env()
            if run_sparse:
                assert len(validation_graph) == 1
                #graph = torch.unsqueeze(graph,  0)
                graph = to_sparse_tensor(graph)
            else:
                graph = torch.unsqueeze(graph, 0)
            Xv = Xv.clone()
            if 'cuda' in self.device:
                Xv = Xv.cuda()
            graph = graph.to(self.device)
            done = False
            self.non_selected = list(np.arange(env.num_nodes))
            self.selected = []
            while done == False:
                #Xvprint(len(self.selected))
                #Xv = Xv.cuda()
                Xv = Xv.to(self.device)
                if run_sparse:
                    val = self.forward(graph, Xv)
                else:
                    val = self.forward(graph, Xv)[0]
                action = self.take_action(graph, Xv, is_validation=True)

                Xv_next, reward, done = env.take_action(action)
                Xv = Xv_next
            #print(selected)
            objective_vals.append(len(self.selected))
        return sum(objective_vals) / len(objective_vals)
Beispiel #6
0
    def get_val_result_batch(self,
                             validation_graph,
                             return_list=False,
                             during_adaption=False):
        N = len(validation_graph)
        N_STEP = 2
        all_graphs = []
        all_Xv = []
        all_envs = []

        fitted_experience_list = [[] for _ in range(N)]

        for g in validation_graph:
            env = MVC_environement(g)
            all_envs.append(env)
            Xv, graph = env.reset_env()
            graph = torch.unsqueeze(graph, 0)
            all_graphs.append(graph)
            all_Xv.append(Xv)
        all_graphs = torch.cat(all_graphs, 0).to(self.device)
        all_Xv = torch.cat(all_Xv, 0).to(self.device)
        all_selected = [[] for _ in range(N)]
        all_dones = [False for _ in range(N)]
        all_done = False
        done_count = 0
        cur_step = 0
        while all_done == False:
            q_val = self.dqn(all_graphs, all_Xv)
            for i in range(N):

                if all_dones[i]:
                    continue

                q_val[i][all_selected[i]] = -float('inf')
                action = torch.argmax(q_val[i]).item()
                if during_adaption:
                    rand_val = random.uniform(0, 1)
                    if rand_val > 0:
                        probs = torch.nn.functional.softmax(q_val[i])
                        m = torch.distributions.categorical.Categorical(
                            probs=probs)
                        action = m.sample().item()
                    #print(m,action)
                all_selected[i].append(action)
                Xv_next, rew, done = all_envs[i].take_action(action)

                if during_adaption:
                    copy_xv = deepcopy(all_Xv[i:i + 1])
                    fit_ex = fitted_q_exp(all_graphs[i:i + 1], copy_xv, action,
                                          rew)
                    fitted_experience_list[i].append(fit_ex)
                    if len(fitted_experience_list[i]) >= N_STEP:

                        n_reward = -N_STEP
                        n_prev_ex = fitted_experience_list[i][0]
                        n_graph = n_prev_ex.graph
                        n_Xv = n_prev_ex.Xv
                        #print( sum(n_Xv[0]) , sum(Xv_next[0]))
                        #zz
                        n_action = n_prev_ex.action
                        ex = experience(n_graph, n_Xv,
                                        torch.tensor([n_action]),
                                        torch.tensor([n_reward]), Xv_next,
                                        done)
                        self.adaption_buffer.push(ex)
                        #print(len(self.adaption_buffer))
                        fitted_experience_list[i].pop(0)
                    #ex = experience( all_graphs[i:i+1] , deepcopy(all_Xv[i:i+1]) , torch.tensor([action]) , torch.tensor([-1]) , Xv_next , done)
                    #self.adaption_buffer.push(ex)
                    #print(len(self.adaption_buffer))
                    #self.train(during_adaption = True , fitted_Q = True)

                all_Xv[i][action] = 1
                if done:
                    all_dones[i] = True
                    done_count += 1
                self.adaption_steps += 1
            cur_step += 1
            if during_adaption and cur_step >= N_STEP + 1:
                #print(len(self.adaption_buffer))
                for ep in range(20):
                    self.train(during_adaption=True,
                               fitted_Q=False,
                               batch_size=128)
                self.update_target_network()

            if (done_count == N):
                break

            #if during_adaption:
            #    for zz in range(N):
            #        self.train(during_adaption = True , fitted_Q = False)
            #        if zz % 10 == 0:
            #            self.update_target_network()
            #print(all_Xv)
            #print(q_val.shape)
            #del all_graphs
            #del all_Xv
            #break
        objective_vals = []
        for s in all_selected:
            objective_vals.append(len(s))

        if return_list:
            return objective_vals

        return sum(objective_vals) / len(objective_vals)