Esempio n. 1
0
    def randomString(stringLength=10):
        letters = string.ascii_lowercase
        return ''.join(random.choice(letters) for i in range(stringLength))

    # Create logging and checkpointing folders.
    random_experiment_name = randomString(10)
    writer = SummaryWriter(log_dir="runs/" + random_experiment_name)
    directory = os.path.join(args['save_dir'], random_experiment_name)
    if not os.path.exists(directory):
        os.makedirs(directory)

    args["device"] = "cpu"
    with open(os.path.join(directory, 'params.json'), 'w') as json_file:
        json.dump(args, json_file)

    agent = MRFAgent(args=args, writer=writer, added_u_dim = 0, temp=args["init_temp"])
    save_dirs = os.path.join(directory, 'params_0')
    agent.save_parameters(save_dirs)

    # Evaluate initial parameter at training environment
    avgs = []
    num_hits, num_shoots = 0, 0
    num_dones, per_worker_rew = [0] * args['num_envs'], [0] * args['num_envs']
    agent.reset()
    env_eval.seed([args['eval_init_seed'] + 1000 * rank for rank in range(args['num_envs'])])

    obs = env_eval.reset()
    while (all([k < args['eval_eps'] for k in num_dones])):
        acts = agent.step(obs, eval=True)
        num_shoots += sum([act == 7 for act in acts])
        n_obs, rewards, dones, info = env_eval.step(acts)
Esempio n. 2
0
        return ''.join(random.choice(letters) for i in range(stringLength))

    random_experiment_name = randomString(10)
    writer = SummaryWriter(log_dir="runs/"+random_experiment_name)
    directory = os.path.join(args['save_dir'], random_experiment_name)
    if not os.path.exists(directory):
        os.makedirs(directory)

    with open(os.path.join(directory,'params.json'), 'w') as json_file:
        json.dump(args, json_file)

    with open(os.path.join('runs',random_experiment_name, 'params.json'), 'w') as json_file:
        json.dump(args, json_file)

    # Initialize the GPL-Q Agent
    agent = MRFAgent(args=args, writer=writer, added_u_dim = 9)

    # Define the training environment
    num_players_train = args['num_players_train']
    num_players_test = args['num_players_test']

    def make_env(env_id, rank, seed=1285, effective_max_num_players=3, with_shuffle=False, gnn_input=True):
        def _init():
            env = gym.make(
                env_id, seed=seed + rank,
                effective_max_num_players=effective_max_num_players,
                init_num_players=effective_max_num_players,
                with_shuffle=with_shuffle,
                gnn_input=gnn_input
            )
            return env
Esempio n. 3
0
    num_players_train = args['num_players_train']
    num_players_test = args['num_players_test']

    env = AsyncVectorEnv([
        make_env(args,
                 i,
                 active_agents=num_players_train,
                 seed=args['seed'],
                 reward_scheme="sparse") for i in range(8)
    ])

    args["device"] = "cpu"
    writer = None

    for idx in range(101):
        agent = MRFAgent(args=args, writer=writer, added_u_dim=0)
        load_dir = args['loading_dir'] + str(idx)
        agent.load_parameters(load_dir)

        obs_list = []

        agent.reset()
        obs = env.reset()
        for i in range(3000):
            print(idx, i)
            obs_list.append(obs)
            acts = agent.step(obs, eval=True)
            n_obs, reward, done, info = env.step(acts)
            obs = n_obs

        agent.save_util_storage("utils_" + str(idx))