def initialize_experiment(experiment_config, agents_config, self_play_configs):
    env_name, requested_env_type = experiment_config['environment']
    task = generate_task(env_name, EnvType(requested_env_type))
    sp_schemes = initialize_training_schemes(self_play_configs, task)
    agents = initialize_agents(task, agents_config)

    return task, sp_schemes, agents
예제 #2
0
def test_can_initialize_delta_uniform_self_plays():
    sp_config = '''
    deltauniform-fullhistory:
        delta: 0.
    deltauniform-halfhistory:
        delta: 0.5
    '''
    sp_schemes = initialize_training_schemes(yaml.load(sp_config), task=None)
    assert all(
        map(lambda sp: isinstance(sp, DeltaDistributionalSelfPlay),
            sp_schemes))
    assert all(map(lambda sp: sp.delta == 0. or sp.delta == 0.5, sp_schemes))
예제 #3
0
def initialize_experiment(experiment_config, agents_config, self_play_configs):
    env, env_type = experiment_config['environment']
    task = generate_task(env, EnvType(env_type))
    sp_schemes = initialize_training_schemes(self_play_configs, task)
    agents = initialize_agents(task, agents_config)

    seeds = list(map(int, experiment_config['seeds']))

    number_of_runs = experiment_config['number_of_runs']
    if len(seeds) < number_of_runs:
        print(f'Number of random seeds does not match "number of runs" config value. Genereting new seeds"')
        seeds = np.random.randint(0, 10000, number_of_runs).tolist()

    return task, sp_schemes, agents, seeds
예제 #4
0
def initialize_experiment(experiment_config, agents_config, self_play_configs):
    task = create_task_from_config(experiment_config['environment'])
    sp_schemes = initialize_training_schemes(self_play_configs, task)
    agents = initialize_agents(task, agents_config)
    initial_menagerie = []

    base_path = experiment_config['experiment_id']
    menagerie_path = f"{base_path}/menagerie/{sp_schemes[0].name}-{experiment_config['algorithms'][0]}"
    # Load pre-trained agent, if there is any (there might be a menagerie but not a trained agent)
    if os.path.exists(base_path) and (os.listdir(base_path) != ['menagerie']):
        logger = logging.getLogger('LOADING AGENT AND MENAGERIE')
        logger.info(f"Attempting to load agent from: {base_path}/")
        agent = load_existing_agent_and_update_task(base_path, task)
        assert os.path.exists(menagerie_path), f'Menagerie should be present at {menagerie_path}'
        initial_menagerie = load_population_from_path(menagerie_path, show_progress=True)
        initial_menagerie.sort(key=lambda agent: agent.finished_episodes)
        logger.info(f'Loaded agent, with {agent.finished_episodes} episodes under its belt')
        logger.info(f'Loaded menagerie containing {len(initial_menagerie)} agents')

    return task, sp_schemes, agents, initial_menagerie