def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape actor_input_shape = self.obs_shape # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构 # 根据参数决定RNN的输入维度 if args.last_action: actor_input_shape += self.n_actions if args.reuse_network: actor_input_shape += self.n_agents self.args = args # 神经网络 # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。 if self.args.alg == 'reinforce': print('Init alg reinforce') self.eval_rnn = RNN(actor_input_shape, args) elif self.args.alg == 'reinforce+commnet': print('Init alg reinforce+commnet') self.eval_rnn = CommNet(actor_input_shape, args) elif self.args.alg == 'reinforce+g2anet': print('Init alg reinforce+g2anet') self.eval_rnn = G2ANet(actor_input_shape, args) else: raise Exception("No such algorithm") if self.args.cuda: self.eval_rnn.cuda() self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map # 如果存在模型则加载模型 if self.args.load_model: if os.path.exists(self.model_dir + '/rnn_params.pkl'): path_rnn = self.model_dir + '/rnn_params.pkl' map_location = 'cuda:0' if self.args.cuda else 'cpu' self.eval_rnn.load_state_dict( torch.load(path_rnn, map_location=map_location)) print('Successfully load the model: {}'.format(path_rnn)) else: raise Exception("No model!") self.rnn_parameters = list(self.eval_rnn.parameters()) if args.optimizer == "RMS": self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters, lr=args.lr_actor) self.args = args # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden self.eval_hidden = None
def __init__(self, args): self.n_actions = args.n_actions self.n_agents = args.n_agents self.state_shape = args.state_shape self.obs_shape = args.obs_shape actor_input_shape = self.obs_shape # actor网络输入的维度,和vdn、qmix的rnn输入维度一样,使用同一个网络结构 critic_input_shape = self._get_critic_input_shape() # critic网络输入的维度 # 根据参数决定RNN的输入维度 if args.last_action: actor_input_shape += self.n_actions if args.reuse_network: actor_input_shape += self.n_agents self.args = args # 神经网络 # 每个agent选动作的网络,输出当前agent所有动作对应的概率,用该概率选动作的时候还需要用softmax再运算一次。 if self.args.alg == 'coma': print('Init alg coma') self.eval_rnn = RNN(actor_input_shape, args) elif self.args.alg == 'coma+commnet': print('Init alg coma+commnet') self.eval_rnn = CommNet(actor_input_shape, args) elif self.args.alg == 'coma+g2anet': print('Init alg coma+g2anet') self.eval_rnn = G2ANet(actor_input_shape, args) else: raise Exception("No such algorithm") # 得到当前agent的所有可执行动作对应的联合Q值,得到之后需要用该Q值和actor网络输出的概率计算advantage self.eval_critic = ComaCritic(critic_input_shape, self.args) self.target_critic = ComaCritic(critic_input_shape, self.args) if self.args.cuda: self.eval_rnn.cuda() self.eval_critic.cuda() self.target_critic.cuda() self.model_dir = args.model_dir + '/' + args.alg + '/' + args.map # 如果存在模型则加载模型 # if os.path.exists(self.model_dir + '/rnn_params.pkl'): # path_rnn = self.model_dir + '/rnn_params.pkl' # path_coma = self.model_dir + '/critic_params.pkl' # self.eval_rnn.load_state_dict(torch.load(path_rnn)) # self.eval_critic.load_state_dict(torch.load(path_coma)) # print('Successfully load the model: {} and {}'.format(path_rnn, path_coma)) # 让target_net和eval_net的网络参数相同 self.target_critic.load_state_dict(self.eval_critic.state_dict()) self.rnn_parameters = list(self.eval_rnn.parameters()) self.critic_parameters = list(self.eval_critic.parameters()) if args.optimizer == "RMS": self.critic_optimizer = torch.optim.RMSprop(self.critic_parameters, lr=args.lr_critic) self.rnn_optimizer = torch.optim.RMSprop(self.rnn_parameters, lr=args.lr_actor) self.args = args # 执行过程中,要为每个agent都维护一个eval_hidden # 学习过程中,要为每个episode的每个agent都维护一个eval_hidden self.eval_hidden = None