def main(args): parser = get_config() all_args = parse_args(args, parser) if all_args.algorithm_name == "rmappo": assert (all_args.use_recurrent_policy or all_args.use_naive_recurrent_policy), ("check recurrent policy!") elif all_args.algorithm_name == "mappo": assert (all_args.use_recurrent_policy == False and all_args.use_naive_recurrent_policy == False), ( "check recurrent policy!") else: raise NotImplementedError # cuda if all_args.cuda and torch.cuda.is_available(): print("choose to use gpu...") device = torch.device("cuda:0") torch.set_num_threads(all_args.n_training_threads) if all_args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True else: print("choose to use cpu...") device = torch.device("cpu") torch.set_num_threads(all_args.n_training_threads) # run dir run_dir = Path(os.path.split(os.path.dirname(os.path.abspath(__file__)))[ 0] + "/results") / all_args.env_name / all_args.hanabi_name / all_args.algorithm_name / all_args.experiment_name if not run_dir.exists(): os.makedirs(str(run_dir)) # wandb if all_args.use_wandb: run = wandb.init(config=all_args, project=all_args.env_name, entity=all_args.user_name, notes=socket.gethostname(), name=str(all_args.algorithm_name) + "_" + str(all_args.experiment_name) + "_seed" + str(all_args.seed), group=all_args.hanabi_name, dir=str(run_dir), job_type="training", reinit=True) else: if not run_dir.exists(): curr_run = 'run1' else: exst_run_nums = [int(str(folder.name).split('run')[1]) for folder in run_dir.iterdir() if str(folder.name).startswith('run')] if len(exst_run_nums) == 0: curr_run = 'run1' else: curr_run = 'run%i' % (max(exst_run_nums) + 1) run_dir = run_dir / curr_run if not run_dir.exists(): os.makedirs(str(run_dir)) setproctitle.setproctitle(str(all_args.algorithm_name) + "-" + str( all_args.env_name) + "-" + str(all_args.experiment_name) + "@" + str(all_args.user_name)) # seed torch.manual_seed(all_args.seed) torch.cuda.manual_seed_all(all_args.seed) np.random.seed(all_args.seed) # env init envs = make_train_env(all_args) eval_envs = make_eval_env(all_args) if all_args.use_eval else None num_agents = all_args.num_agents config = { "all_args": all_args, "envs": envs, "eval_envs": eval_envs, "num_agents": num_agents, "device": device, "run_dir": run_dir } # run experiments if all_args.share_policy: from onpolicy.runner.shared.hanabi_runner_forward import HanabiRunner as Runner else: from onpolicy.runner.separated.hanabi_runner_forward import HanabiRunner as Runner runner = Runner(config) runner.run() # post process envs.close() if all_args.use_eval and eval_envs is not envs: eval_envs.close() if all_args.use_wandb: run.finish() else: runner.writter.export_scalars_to_json(str(runner.log_dir + '/summary.json')) runner.writter.close()
def main(args): parser = get_config() all_args = parse_args(args, parser) if all_args.algorithm_name == "rmappo" or all_args.algorithm_name == "rmappg": assert (all_args.use_recurrent_policy or all_args.use_naive_recurrent_policy), ( "check recurrent policy!") elif all_args.algorithm_name == "mappo" or all_args.algorithm_name == "mappg": assert (all_args.use_recurrent_policy and all_args.use_naive_recurrent_policy) == False, ( "check recurrent policy!") else: raise NotImplementedError assert all_args.use_render, ("u need to set use_render be True") assert not (all_args.model_dir == None or all_args.model_dir == ""), ("set model_dir first") assert all_args.n_rollout_threads == 1, ( "only support to use 1 env to render.") # cuda if all_args.cuda and torch.cuda.is_available(): print("choose to use gpu...") device = torch.device("cuda:0") torch.set_num_threads(all_args.n_training_threads) if all_args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True else: print("choose to use cpu...") device = torch.device("cpu") torch.set_num_threads(all_args.n_training_threads) # run dir run_dir = Path( os.path.split(os.path.dirname(os.path.abspath(__file__)))[0] + "/results" ) / all_args.env_name / all_args.scenario_name / all_args.algorithm_name / all_args.experiment_name if not run_dir.exists(): os.makedirs(str(run_dir)) if not run_dir.exists(): curr_run = 'run1' else: exst_run_nums = [ int(str(folder.name).split('run')[1]) for folder in run_dir.iterdir() if str(folder.name).startswith('run') ] if len(exst_run_nums) == 0: curr_run = 'run1' else: curr_run = 'run%i' % (max(exst_run_nums) + 1) run_dir = run_dir / curr_run if not run_dir.exists(): os.makedirs(str(run_dir)) setproctitle.setproctitle(str(all_args.algorithm_name) + "-" + \ str(all_args.env_name) + "-" + str(all_args.experiment_name) + "@" + str(all_args.user_name)) # seed torch.manual_seed(all_args.seed) torch.cuda.manual_seed_all(all_args.seed) np.random.seed(all_args.seed) # env init envs = make_render_env(all_args) eval_envs = None num_agents = all_args.num_agents all_args.episode_length = envs.get_max_step()[0] config = { "all_args": all_args, "envs": envs, "eval_envs": eval_envs, "num_agents": num_agents, "device": device, "run_dir": run_dir } # run experiments if all_args.share_policy: from onpolicy.runner.shared.gridworld_runner import GridWorldRunner as Runner else: from onpolicy.runner.separated.gridworld_runner import GridWorldRunner as Runner runner = Runner(config) runner.render() # post process envs.close()
def main(args): ''' examine.py is used to display environments and run policies. For an example environment jsonnet, see mujoco-worldgen/examples/example_env_examine.jsonnet You can find saved policies and the in the 'examples' together with the environment they were trained in and the hyperparameters used. The naming used is 'examples/<env_name>.jsonnet' for the environment jsonnet file and 'examples/<env_name>.npz' for the policy weights file. Example uses: bin/examine.py hide_and_seek bin/examine.py mae_envs/envs/base.py bin/examine.py base n_boxes=6 n_ramps=2 n_agents=3 bin/examine.py my_env_jsonnet.jsonnet bin/examine.py my_env_jsonnet.jsonnet my_policy.npz bin/examine.py hide_and_seek my_policy.npz n_hiders=3 n_seekers=2 n_boxes=8 n_ramps=1 bin/examine.py examples/hide_and_seek_quadrant.jsonnet examples/hide_and_seek_quadrant.npz ''' #names, kwargs = parse_arguments(argv) parser = get_config() args = parse_args(args, parser) kwargs={'args': args} env_name = args.env_name num_hiders = args.num_hiders num_seekers = args.num_seekers num_agents = num_hiders + num_seekers core_dir = abspath(join(dirname(__file__))) envs_dir = 'envs/hns/envs' # where hide_and_seek.py is. xmls_dir = 'xmls' if args.use_render: # run policies on the environment # importing PolicyViewer and load_policy here because they depend on several # packages which are only needed for playing policies, not for any of the # environments code. from onpolicy.envs.hns.viewer.policy_viewer import PolicyViewer_hs from onpolicy.envs.hns.ma_policy.load_policy import load_policy env, args_remaining_env = load_env(env_name, core_dir=core_dir, envs_dir=envs_dir, xmls_dir=xmls_dir, return_args_remaining=True, **kwargs) if isinstance(env.action_space, Tuple): env = JoinMultiAgentActions(env) if env is None: raise Exception(f'Could not find environment based on pattern {env_name}') env.reset() # generate action and observation spaces policies = [] for agent_id in range(num_agents): if args.share_policy: actor_critic = torch.load(str(args.model_dir) + "/agent_model.pt")['model'] else: actor_critic = torch.load(str(args.model_dir) + "/agent" + str(agent_id) + "_model.pt")['model'] policies.append(actor_critic) args_remaining_policy = args_remaining_env if env is not None and policies is not None: args_to_pass, args_remaining_viewer = extract_matching_arguments(PolicyViewer_hs, kwargs) args_remaining = set(args_remaining_env) args_remaining = args_remaining.intersection(set(args_remaining_policy)) args_remaining = args_remaining.intersection(set(args_remaining_viewer)) assert len(args_remaining) == 0, ( f"There left unused arguments: {args_remaining}. There shouldn't be any.") viewer = PolicyViewer_hs(env, policies, **args_to_pass) viewer.run() else: # examine the environment examine_env(env_name, kwargs, core_dir=core_dir, envs_dir=envs_dir, xmls_dir=xmls_dir, env_viewer=EnvViewer)
# [n_thread, balala*n] share_obs = share_obs.reshape(*share_obs.shape[:-2], -1) # [n_thread,n_agent,balala*n] share_obs = np.expand_dims(share_obs, 1).repeat(self.num_agents, axis=1) return share_obs if __name__ == "__main__": from onpolicy.config import get_config from onpolicy.envs.gfootball.gfootball_env import GoogleFootballEnv from onpolicy.envs.env_wrappers import SubprocVecEnv import gym gym.logger.set_level(gym.logger.ERROR) args = get_config().parse_known_args()[0] config = { 'all_args': args, 'envs': SubprocVecEnv([ lambda: GoogleFootballEnv(num_of_left_agents=3, env_name='test_example_multiagent', representation="simple115v2", channel_dimensions=(48, 36)) for i in range(args.n_rollout_threads) ]), 'eval_envs': SubprocVecEnv([ lambda: GoogleFootballEnv(num_of_left_agents=3, env_name='test_example_multiagent',
def main(args): parser = get_config() all_args = parse_args(args, parser) if all_args.algorithm_name == "rmappo" or all_args.algorithm_name == "rmappg": assert (all_args.use_recurrent_policy or all_args.use_naive_recurrent_policy), ( "check recurrent policy!") elif all_args.algorithm_name == "mappo" or all_args.algorithm_name == "mappg": assert (all_args.use_recurrent_policy == False and all_args.use_naive_recurrent_policy == False), ("check recurrent policy!") else: raise NotImplementedError assert all_args.use_render, ("u need to set use_render be True") assert not (all_args.model_dir == None or all_args.model_dir == ""), ("set model_dir first") assert all_args.n_rollout_threads == 1, ( "only support to use 1 env to render.") # cuda if all_args.cuda and torch.cuda.is_available(): print("choose to use gpu...") device = torch.device("cuda:0") torch.set_num_threads(all_args.n_training_threads) if all_args.cuda_deterministic: torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True else: print("choose to use cpu...") device = torch.device("cpu") torch.set_num_threads(all_args.n_training_threads) # run dir and video dir run_dir = Path(all_args.model_dir).parent.absolute() if all_args.save_videos and all_args.video_dir == "": video_dir = run_dir / "videos" all_args.video_dir = str(video_dir) if not video_dir.exists(): os.makedirs(str(video_dir)) setproctitle.setproctitle("-".join([ all_args.env_name, all_args.scenario_name, all_args.algorithm_name, all_args.experiment_name ]) + "@" + all_args.user_name) # seed torch.manual_seed(all_args.seed) torch.cuda.manual_seed_all(all_args.seed) np.random.seed(all_args.seed) # env init envs = make_train_env(all_args) num_agents = all_args.num_agents config = { "all_args": all_args, "envs": envs, "eval_envs": None, "num_agents": num_agents, "device": device, "run_dir": run_dir } # run experiments if all_args.share_policy: from onpolicy.runner.shared.football_runner import FootballRunner as Runner else: from onpolicy.runner.separated.football_runner import FootballRunner as Runner runner = Runner(config) runner.render() # post process envs.close()