Esempio n. 1
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        actor_input_shape = self.obs_shape  # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构
        # 根据参数决定RNN的输入维度
        if args.last_action:
            actor_input_shape += self.n_actions
        if args.reuse_network:
            actor_input_shape += self.n_agents
        self.args = args

        # 神经网络
        # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。
        if self.args.alg == 'reinforce':
            print('Init alg reinforce')
            self.eval_rnn = RNN(actor_input_shape, args)
        elif self.args.alg == 'reinforce+commnet':
            print('Init alg reinforce+commnet')
            self.eval_rnn = CommNet(actor_input_shape, args)
        elif self.args.alg == 'reinforce+g2anet':
            print('Init alg reinforce+g2anet')
            self.eval_rnn = G2ANet(actor_input_shape, args)
        else:
            raise Exception("No such algorithm")

        if self.args.cuda:
            self.eval_rnn.cuda()

        self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map
        # 如果存在模型则加载模型
        if self.args.load_model:
            if os.path.exists(self.model_dir + '/rnn_params.pkl'):
                path_rnn = self.model_dir + '/rnn_params.pkl'
                map_location = 'cuda:0' if self.args.cuda else 'cpu'
                self.eval_rnn.load_state_dict(
                    torch.load(path_rnn, map_location=map_location))
                print('Successfully load the model: {}'.format(path_rnn))
            else:
                raise Exception("No model!")

        self.rnn_parameters = list(self.eval_rnn.parameters())
        if args.optimizer == "RMS":
            self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters,
                                                     lr=args.lr_actor)
        self.args = args

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden
        self.eval_hidden = None
Esempio n. 2
0
    def __init__(self, args):
        self.n_actions = args.n_actions
        self.n_agents = args.n_agents
        self.state_shape = args.state_shape
        self.obs_shape = args.obs_shape
        actor_input_shape = self.obs_shape  # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构
        critic_input_shape = self._get_critic_input_shape()  # critic网络输入的维度
        # 根据参数决定RNN的输入维度
        if args.last_action:
            actor_input_shape += self.n_actions
        if args.reuse_network:
            actor_input_shape += self.n_agents
        self.args = args

        # 神经网络
        # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。
        if self.args.alg == 'coma':
            print('Init alg coma')
            self.eval_rnn = RNN(actor_input_shape, args)
        elif self.args.alg == 'coma+commnet':
            print('Init alg coma+commnet')
            self.eval_rnn = CommNet(actor_input_shape, args)
        elif self.args.alg == 'coma+g2anet':
            print('Init alg coma+g2anet')
            self.eval_rnn = G2ANet(actor_input_shape, args)
        else:
            raise Exception("No such algorithm")

        # 得到当前agent的所有可执行动作对应的联合Q值,得到之后需要用该Q值和actor网络输出的概率计算advantage
        self.eval_critic = ComaCritic(critic_input_shape, self.args)
        self.target_critic = ComaCritic(critic_input_shape, self.args)

        if self.args.cuda:
            self.eval_rnn.cuda()
            self.eval_critic.cuda()
            self.target_critic.cuda()

        self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map
        # 如果存在模型则加载模型
        # if os.path.exists(self.model_dir + '/rnn_params.pkl'):
        #     path_rnn = self.model_dir + '/rnn_params.pkl'
        #     path_coma = self.model_dir + '/critic_params.pkl'
        #     self.eval_rnn.load_state_dict(torch.load(path_rnn))
        #     self.eval_critic.load_state_dict(torch.load(path_coma))
        #     print('Successfully load the model: {} and {}'.format(path_rnn, path_coma))

        # 让target_net和eval_net的网络参数相同
        self.target_critic.load_state_dict(self.eval_critic.state_dict())

        self.rnn_parameters = list(self.eval_rnn.parameters())
        self.critic_parameters = list(self.eval_critic.parameters())

        if args.optimizer == "RMS":
            self.critic_optimizer = torch.optim.RMSprop(self.critic_parameters,
                                                        lr=args.lr_critic)
            self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters,
                                                     lr=args.lr_actor)
        self.args = args

        # 执行过程中,要为每个agent都维护一个eval_hidden
        # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden
        self.eval_hidden = None