def randomString(stringLength=10): letters = string.ascii_lowercase return ''.join(random.choice(letters) for i in range(stringLength)) # Create logging and checkpointing folders. random_experiment_name = randomString(10) writer = SummaryWriter(log_dir="runs/" + random_experiment_name) directory = os.path.join(args['save_dir'], random_experiment_name) if not os.path.exists(directory): os.makedirs(directory) args["device"] = "cpu" with open(os.path.join(directory, 'params.json'), 'w') as json_file: json.dump(args, json_file) agent = MRFAgent(args=args, writer=writer, added_u_dim = 0, temp=args["init_temp"]) save_dirs = os.path.join(directory, 'params_0') agent.save_parameters(save_dirs) # Evaluate initial parameter at training environment avgs = [] num_hits, num_shoots = 0, 0 num_dones, per_worker_rew = [0] * args['num_envs'], [0] * args['num_envs'] agent.reset() env_eval.seed([args['eval_init_seed'] + 1000 * rank for rank in range(args['num_envs'])]) obs = env_eval.reset() while (all([k < args['eval_eps'] for k in num_dones])): acts = agent.step(obs, eval=True) num_shoots += sum([act == 7 for act in acts]) n_obs, rewards, dones, info = env_eval.step(acts)
return ''.join(random.choice(letters) for i in range(stringLength)) random_experiment_name = randomString(10) writer = SummaryWriter(log_dir="runs/"+random_experiment_name) directory = os.path.join(args['save_dir'], random_experiment_name) if not os.path.exists(directory): os.makedirs(directory) with open(os.path.join(directory,'params.json'), 'w') as json_file: json.dump(args, json_file) with open(os.path.join('runs',random_experiment_name, 'params.json'), 'w') as json_file: json.dump(args, json_file) # Initialize the GPL-Q Agent agent = MRFAgent(args=args, writer=writer, added_u_dim = 9) # Define the training environment num_players_train = args['num_players_train'] num_players_test = args['num_players_test'] def make_env(env_id, rank, seed=1285, effective_max_num_players=3, with_shuffle=False, gnn_input=True): def _init(): env = gym.make( env_id, seed=seed + rank, effective_max_num_players=effective_max_num_players, init_num_players=effective_max_num_players, with_shuffle=with_shuffle, gnn_input=gnn_input ) return env
num_players_train = args['num_players_train'] num_players_test = args['num_players_test'] env = AsyncVectorEnv([ make_env(args, i, active_agents=num_players_train, seed=args['seed'], reward_scheme="sparse") for i in range(8) ]) args["device"] = "cpu" writer = None for idx in range(101): agent = MRFAgent(args=args, writer=writer, added_u_dim=0) load_dir = args['loading_dir'] + str(idx) agent.load_parameters(load_dir) obs_list = [] agent.reset() obs = env.reset() for i in range(3000): print(idx, i) obs_list.append(obs) acts = agent.step(obs, eval=True) n_obs, reward, done, info = env.step(acts) obs = n_obs agent.save_util_storage("utils_" + str(idx))