def initialize_experiment(experiment_config, agents_config, self_play_configs): env_name, requested_env_type = experiment_config['environment'] task = generate_task(env_name, EnvType(requested_env_type)) sp_schemes = initialize_training_schemes(self_play_configs, task) agents = initialize_agents(task, agents_config) return task, sp_schemes, agents
def test_can_initialize_delta_uniform_self_plays(): sp_config = ''' deltauniform-fullhistory: delta: 0. deltauniform-halfhistory: delta: 0.5 ''' sp_schemes = initialize_training_schemes(yaml.load(sp_config), task=None) assert all( map(lambda sp: isinstance(sp, DeltaDistributionalSelfPlay), sp_schemes)) assert all(map(lambda sp: sp.delta == 0. or sp.delta == 0.5, sp_schemes))
def initialize_experiment(experiment_config, agents_config, self_play_configs): env, env_type = experiment_config['environment'] task = generate_task(env, EnvType(env_type)) sp_schemes = initialize_training_schemes(self_play_configs, task) agents = initialize_agents(task, agents_config) seeds = list(map(int, experiment_config['seeds'])) number_of_runs = experiment_config['number_of_runs'] if len(seeds) < number_of_runs: print(f'Number of random seeds does not match "number of runs" config value. Genereting new seeds"') seeds = np.random.randint(0, 10000, number_of_runs).tolist() return task, sp_schemes, agents, seeds
def initialize_experiment(experiment_config, agents_config, self_play_configs): task = create_task_from_config(experiment_config['environment']) sp_schemes = initialize_training_schemes(self_play_configs, task) agents = initialize_agents(task, agents_config) initial_menagerie = [] base_path = experiment_config['experiment_id'] menagerie_path = f"{base_path}/menagerie/{sp_schemes[0].name}-{experiment_config['algorithms'][0]}" # Load pre-trained agent, if there is any (there might be a menagerie but not a trained agent) if os.path.exists(base_path) and (os.listdir(base_path) != ['menagerie']): logger = logging.getLogger('LOADING AGENT AND MENAGERIE') logger.info(f"Attempting to load agent from: {base_path}/") agent = load_existing_agent_and_update_task(base_path, task) assert os.path.exists(menagerie_path), f'Menagerie should be present at {menagerie_path}' initial_menagerie = load_population_from_path(menagerie_path, show_progress=True) initial_menagerie.sort(key=lambda agent: agent.finished_episodes) logger.info(f'Loaded agent, with {agent.finished_episodes} episodes under its belt') logger.info(f'Loaded menagerie containing {len(initial_menagerie)} agents') return task, sp_schemes, agents, initial_menagerie