def run(config, seed, device, logdir): set_global_seeds(seed) queue = mp.Queue(maxsize=100) env = make_env(config, seed, 'train') agent = Agent(config, env, device) agent.share_memory() runner = EpisodeRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) learner_process = mp.Process(target=learner, args=(config, logdir, agent, engine, queue)) actor_processes = [ mp.Process(target=actor, args=(config, seed, make_env, agent, runner, queue)) for _ in range(config['agent.num_actors']) ] evaluator_process = mp.Process(target=evaluator, args=(config, logdir, seed, make_env, agent)) learner_process.start() print('Learner started !') [p.start() for p in actor_processes] print('Actors started !') evaluator_process.start() print('Evaluator started !') evaluator_process.join() [p.join() for p in actor_processes] learner_process.join() return None
def run(config, seed, device, logdir): set_global_seeds(seed) env = make_env(config, seed, 'train') eval_env = make_env(config, seed, 'eval') random_agent = RandomAgent(config, env, device) if config['agent.use_td3']: agent = TD3Agent(config, env, device) else: agent = DDPGAgent(config, env, device) runner = EpisodeRunner() replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, random_agent=random_agent, env=env, eval_env=eval_env, runner=runner, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None
def fitness(data): torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK config, seed, device, param = data env = make_env(config, seed, 'train') agent = Agent(config, env, device) agent.from_vec(tensorify(param, 'cpu')) runner = EpisodeRunner() with torch.no_grad(): D = runner(agent, env, 10) R = np.mean([sum(traj.rewards) for traj in D]) H = np.mean([traj.T for traj in D]) return R, H
def test_episode_runner(env_id, N): env = gym.make(env_id) env = TimeStepEnv(env) agent = RandomAgent(None, env, None) runner = EpisodeRunner() D = runner(agent, env, N) assert len(D) == N assert all([isinstance(d, Trajectory) for d in D]) assert all([traj.finished for traj in D]) assert all([traj[0].first() for traj in D]) assert all([traj[-1].last() for traj in D]) for traj in D: for timestep in traj[1:-1]: assert timestep.mid()
def evaluator(config, logdir, seed, make_env, learner_agent): torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK eval_logs = [] env = make_env(config, seed, 'train') agent = Agent(config, env, torch.device('cpu')) runner = EpisodeRunner(reset_on_call=True) evaluated_steps = config['eval.freq'] while learner_agent.total_timestep < config['train.timestep']: if learner_agent.total_timestep < evaluated_steps: time.sleep(1.0) else: t0 = time.perf_counter() agent.load_state_dict( learner_agent.state_dict()) # copy to CPU by default with torch.no_grad(): D = [] for _ in range(config['eval.num_episode']): D += runner(agent, env, env.spec.max_episode_steps) logger = Logger() logger('num_seconds', round(time.perf_counter() - t0, 1)) logger('num_trajectories', len(D)) logger('num_timesteps', sum([len(traj) for traj in D])) logger('accumulated_trained_timesteps', learner_agent.total_timestep) infos = [ info for info in chain.from_iterable([traj.infos for traj in D]) if 'episode' in info ] online_returns = [info['episode']['return'] for info in infos] online_horizons = [info['episode']['horizon'] for info in infos] logger( 'online_return', describe(online_returns, axis=-1, repr_indent=1, repr_prefix='\n')) logger( 'online_horizon', describe(online_horizons, axis=-1, repr_indent=1, repr_prefix='\n')) monitor_env = get_wrapper(env, 'VecMonitor') logger( 'running_return', describe(monitor_env.return_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger( 'running_horizon', describe(monitor_env.horizon_queue, axis=-1, repr_indent=1, repr_prefix='\n')) logger.dump(keys=None, index=0, indent=0, border=color_str('+' * 50, color='green')) eval_logs.append(logger.logs) evaluated_steps += config['eval.freq'] pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl')