def __init__(self, env, args): self.env = env # 用来在一个稀疏奖赏的环境上评估算法的好坏,胜利为1,失败为-1,其他普通的一步为0 ''' self.env_evaluate = StarCraft2Env(map_name=args.map, step_mul=args.step_mul, difficulty=args.difficulty, game_version=args.game_version, seed=args.seed, replay_dir=args.replay_dir, reward_sparse=True, reward_scale=False) ''' self.env_evaluate = MeetEnv() if args.alg.find('commnet') > -1 or args.alg.find('g2anet') > -1: # communication agent self.agents = CommAgents(args) self.rolloutWorker = CommRolloutWorker(env, self.agents, args) self.evaluateWorker = CommRolloutWorker(self.env_evaluate, self.agents, args) else: # no communication agent self.agents = Agents(args) self.rolloutWorker = RolloutWorker(env, self.agents, args) self.evaluateWorker = RolloutWorker(self.env_evaluate, self.agents, args) if args.alg.find('coma') == -1 and args.alg.find('central_v') == -1 and args.alg.find('reinforce') == -1: # these 3 algorithms are on-poliy self.buffer = ReplayBuffer(args) self.args = args # 用来保存plt和pkl self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.map if not os.path.exists(self.save_path): os.makedirs(self.save_path)
def __init__(self, env, args): self.env = env if args.alg.find('commnet') > -1 or args.alg.find( 'g2anet') > -1: # communication agent self.agents = CommAgents(args) self.rolloutWorker = CommRolloutWorker(env, self.agents, args) else: # no communication agent self.agents = Agents(args) self.rolloutWorker = RolloutWorker(env, self.agents, args) if args.learn and args.alg.find('coma') == -1 and args.alg.find( 'central_v') == -1 and args.alg.find( 'reinforce') == -1: # these 3 algorithms are on-poliy if args.use_per: self.buffer = PrioritizedReplayBuffer(args) else: self.buffer = ReplayBuffer(args) self.args = args self.win_rates = [] self.episode_rewards = [] # 用来保存plt和pkl self.save_path = self.args.result_dir + '/' + args.map + '/' if not os.path.exists(self.save_path): os.makedirs(self.save_path) self.file_name = self.save_path + str(args.env_name) + '_' + str( args.n_agents) + '_' + str(args.map_size) + '_' + args.name_time
def __init__(self, curriculum, args, target_env): self.target_env = target_env self.curriculum = curriculum if args.alg.find('commnet') > -1 or args.alg.find( 'g2anet') > -1: # communication agent self.agents = CommAgents(args) self.rolloutWorker = CommRolloutWorker(None, self.agents, args) else: # no communication agent self.agents = Agents(args) self.rolloutWorker = RolloutWorker(None, self.agents, args) if not args.evaluate and args.alg.find('coma') == -1 and args.alg.find( 'central_v') == -1 and args.alg.find( 'reinforce') == -1: # these 3 algorithms are on-poliy self.buffer = None self.args = args self.win_rates = [] self.eval_episode_rewards = [] # 用来保存plt和pkl self.save_path = args.save_path if not os.path.exists(self.save_path): os.makedirs(self.save_path) self.train_rewards = [] self.ratios = [] self.historical_params = {} self.switch = True # we will be switching to some task self.patience = 20 self.writer: SummaryWriter = None self.eval_envs = None self.debug = False
def __init__(self, env, args): self.env = env self.args = args if args.alg.find('commnet') > -1 or args.alg.find( 'g2anet') > -1: # communication agent self.agents = CommAgents(args) self.rolloutWorker = CommRolloutWorker(env, self.agents, args) else: # no communication agent self.agents = Agents(args) self.qmix_pg_learner = QMIX_PG(self.agents, args) self.rolloutWorker = RolloutWorker(env, self.agents, args) if args.learn and args.alg.find('coma') == -1 and args.alg.find( 'central_v') == -1 and args.alg.find( 'reinforce') == -1: # these 3 algorithms are on-poliy self.actor_critic_buffer = ReplayBuffer(args, args.buffer_size) # self.actor_buffer = ReplayBuffer(args, args.actor_buffer_size) self.args = args self.win_rates = [] self.episode_rewards = [] # 用来保存plt和pkl tmp = f'clamp2-5_rewardscale10_' + f'{args.buffer_size}_{args.actor_buffer_size}_{args.critic_buffer_size}_{args.actor_train_steps}_{args.critic_train_steps}_' \ f'{args.actor_update_delay}_{args.critic_lr}_{args.n_epoch}_{args.temp}' # f'clamp2-5_'+ rewardscale10_ self.save_path = self.args.result_dir + '/linear_mix/' + 'mcsac' + '/' + tmp + '/' + args.map # _gradclip0.5 if not os.path.exists(self.save_path): os.makedirs(self.save_path)
def __init__(self, env, args): self.env = env if args.alg.find('commnet') > -1 or args.alg.find( 'g2anet') > -1: # communication agent self.agents = CommAgents(args) self.rolloutWorker = CommRolloutWorker(env, self.agents, args) else: # no communication agent self.agents = Agents(args) self.rolloutWorker = RolloutWorker(env, self.agents, args) if args.learn and args.alg.find('coma') == -1 and args.alg.find( 'central_v') == -1 and args.alg.find( 'reinforce') == -1: # these 3 algorithms are on-poliy self.buffer = ReplayBuffer(args) self.args = args self.plt_success = [] self.episode_rewards = [] # 用来保存plt和pkl self.save_path = self.args.result_dir + '/' + args.alg + '/' + args.env_name if not os.path.exists(self.save_path): os.makedirs(self.save_path)