Exemple #1
0
                    loss = self.loss_func(target_v, Qsa)
                    losses += loss
                self.optimizer.zero_grad()
                losses.backward()
                self.optimizer.step()
                self.writer.add_scalar('loss/value_loss', losses / self.batch_size, self.update_count)
                self.update_count += 1
                if self.update_count % 500 == 0:
                    self.target_net.load_state_dict(self.act_net.state_dict())
                    torch.save(self.act_net.state_dict(), config.act_net_model_dir + str(self.update_count) + ".model")


from BiYeSheJi.Module.environment import Environment

env = Environment(Graph)
env.init_TreeList(config.dataSet + "/train.txt")


def main():
    agentP = DQN()
    find_target = 0
    for i_ep in range(num_episodes):
        root = env.reset()
        if render: env.render()
        # node_t 是经历一次蒙特卡洛树搜索后根据UCT选择的最好下一节点
        path = [root.state.state_tup, ]
        node_t = root
        reward = 0
        while node_t.state.current_node != root.state.target_node:
            node_t = Policy_MCTS(node_t, agentP.act_net)
            if node_t is not None:
Exemple #2
0

def main():
    from multiprocessing import Pool
    from multiprocessing import Manager
    pool = Pool(5)
    isright = 0
    precision_queue = Manager().Queue()
    for i_ep, root in enumerate(env.TreeList):
        pool.apply_async(func=sub_main, args=(root, precision_queue))
    pool.close()
    pool.join()

    ql = precision_queue.qsize()
    for i in range(ql):
        isright += precision_queue.get_nowait()
    print(isright/precision_queue.qsize())

    # agentP.writer.add_scalar('find_target/step', find_target, global_step=i_ep)


agentP = DQN()
Policy_net = agentP.policy_net
if __name__ == '__main__':
    from BiYeSheJi.Module.environment import Environment

    env = Environment(Graph)
    env.init_TreeList(config.dataSet + "/test.txt")

    main()