def test_run_experiment(): run_experiment(worker_class=ExperimentWorker, master_class=ExperimentMaster, num_worker=10) p = Path('./some path') assert p.exists() assert (p / 'configs.pkl').exists() # Check all configuration folders with their IDs and subfolders for all random seeds for i in range(18): config_p = p / str(i) assert config_p.exists() for seed in [123, 345, 567, 789, 901]: assert (config_p / str(seed)).exists # Clean the logging directory rmtree(p) # Test remove assert not p.exists()
def test_run_experiment(num_sample, max_workers, chunksize): def run(config, seed, device, logdir): return config['ID'], seed, device, logdir config = Config( { 'network.lr': Grid([1e-3, 5e-3]), 'network.size': [32, 16], 'env.id': Grid(['CartPole-v1', 'Ant-v2']) }, num_sample=num_sample, keep_dict_order=True) seeds = [1, 2, 3] log_dir = './some_path' run_experiment(run, config, seeds, log_dir, max_workers, chunksize, use_gpu=False, gpu_ids=None) p = Path('./some_path') assert p.exists() assert (p / 'configs.pkl').exists() assert (p / 'source_files').exists() and (p / 'source_files').is_dir() # Check all configuration folders with their IDs and subfolders for all random seeds for i in range(4): config_p = p / str(i) assert config_p.exists() assert (config_p / 'config.yml').exists() for seed in seeds: assert (config_p / str(seed)).exists() # Clean the logging directory rmtree(p) # Test remove assert not p.exists()
solutions = es.ask() out = list(executor.map(fitness, solutions, chunksize=4)) Rs, Hs = zip(*out) es.tell(solutions, [-R for R in Rs]) logger = Logger() logger('generation', generation+1) logger('num_seconds', round(time.perf_counter() - start_time, 1)) logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('fbest', es.result.fbest) train_logs.append(logger.logs) if generation == 0 or (generation+1)%config['log.freq'] == 0: logger.dump(keys=None, index=0, indent=0, border='-'*50) if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))): agent.from_vec(tensorify(es.result.xbest, 'cpu')) agent.checkpoint(logdir, generation+1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[1770966829, 1500925526, 2054191100], log_dir='logs/default', max_workers=None, # no parallelization chunksize=1, use_gpu=False, gpu_ids=None)
runner = StepRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) train_logs = [] checkpoint_count = 0 for i in count(): if agent.total_timestep >= config['train.timestep']: break train_logger = engine.train(i) train_logs.append(train_logger.logs) if i == 0 or (i + 1) % config['log.freq'] == 0: train_logger.dump(keys=None, index=0, indent=0, border='-' * 50) if agent.total_timestep >= int(config['train.timestep'] * (checkpoint_count / (config['checkpoint.num'] - 1))): agent.checkpoint(logdir, i + 1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment( run=run, config=config, seeds=[1770966829, 1500925526, 2054191100], log_dir='logs/default', max_workers=os.cpu_count(), chunksize=1, use_gpu=False, # CPU a bit faster gpu_ids=None)
def run(config, seed, device, logdir): set_global_seeds(seed) env = make_env(config, seed) env = VecMonitor(env) env = VecStepInfo(env) eval_env = make_env(config, seed) eval_env = VecMonitor(eval_env) agent = Agent(config, env, device) replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707], log_dir='logs/default', max_workers=os.cpu_count(), chunksize=1, use_gpu=True, # GPU much faster, note that performance differs between CPU/GPU gpu_ids=None)
set_global_seeds(seed) logdir = Path(config['log.dir']) / str(config['ID']) / str(seed) train_loader, test_loader = make_dataset(config) if config['nn.type'] == 'VAE': model = VAE(config, device) elif config['nn.type'] == 'ConvVAE': model = ConvVAE(config, device) optimizer = optim.Adam(model.parameters(), lr=1e-3) engine = Engine(config, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader) train_logs = [] eval_logs = [] for epoch in range(config['train.num_epoch']): train_logger = engine.train(epoch, logdir=logdir) train_logs.append(train_logger.logs) eval_logger = engine.eval(epoch, logdir=logdir) eval_logs.append(eval_logger.logs) pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[1770966829], num_worker=100)
env = make_env(config, seed) env = VecMonitor(env) if config['env.standardize_obs']: env = VecStandardizeObservation(env, clip=5.) if config['env.standardize_reward']: env = VecStandardizeReward(env, clip=10., gamma=config['agent.gamma']) agent = Agent(config, env, device) runner = EpisodeRunner(reset_on_call=False) engine = Engine(config, agent=agent, env=env, runner=runner) train_logs = [] for i in count(): if agent.total_timestep >= config['train.timestep']: break train_logger = engine.train(i) train_logs.append(train_logger.logs) if i == 0 or (i+1) % config['log.freq'] == 0: train_logger.dump(keys=None, index=0, indent=0, border='-'*50) if i == 0 or (i+1) % config['checkpoint.freq'] == 0: agent.checkpoint(logdir, i + 1) agent.checkpoint(logdir, i + 1) pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[1770966829, 1500925526, 2054191100], num_worker=os.cpu_count())
env = make_env(config, seed) env = VecMonitor(env) eval_env = make_env(config, seed) eval_env = VecMonitor(eval_env) agent = Agent(config, env, device) replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[ 4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707 ], num_worker=os.cpu_count())
2) # number of trajectories per training iteration configurator.fixed('train.ratio_T', 1.0) # percentage of max allowed horizon configurator.fixed('eval.independent', False) configurator.fixed( 'eval.N', 10 ) # number of episodes to evaluate, do not specify T for complete episode configurator.fixed('train.batch_size', 256) configurator.fixed('train.num_epochs', 80) configurator.fixed('log.interval', 10) # logging interval configurator.fixed('log.dir', 'logs/default') # logging directory list_config = configurator.make_configs() return list_config def make_seeds(self): list_seed = [1770966829, 1500925526, 2054191100] return list_seed def process_results(self, results): assert all([result is None for result in results]) if __name__ == '__main__': run_experiment(worker_class=ExperimentWorker, master_class=ExperimentMaster, num_worker=100)
from lagom.experiment import run_experiment from experiment import ExperimentWorker from experiment import ExperimentMaster run_experiment(worker_class=ExperimentWorker, master_class=ExperimentMaster, num_worker=2, daemonic_worker=None)
eval_env = make_env(config, seed) eval_env = VecMonitor(eval_env) agent = Agent(config, env, device) replay = ReplayBuffer(env, config['replay.capacity'], device) engine = Engine(config, agent=agent, env=env, eval_env=eval_env, replay=replay, logdir=logdir) train_logs, eval_logs = engine.train() pickle_dump(obj=train_logs, f=logdir / 'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir / 'eval_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[ 4153361530, 3503522377, 2876994566, 172236777, 3949341511, 849059707 ], log_dir='logs/default', max_workers=os.cpu_count(), chunksize=1, use_gpu=True, gpu_ids=None)
model = ConvVAE(config, device) optimizer = optim.Adam(model.parameters(), lr=config['lr']) engine = Engine(config, model=model, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader) train_logs = [] eval_logs = [] for epoch in range(config['train.num_epoch']): train_logger = engine.train(epoch, logdir=logdir) train_logs.append(train_logger.logs) eval_logger = engine.eval(epoch, logdir=logdir) eval_logs.append(eval_logger.logs) pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') pickle_dump(obj=eval_logs, f=logdir/'eval_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[1770966829], log_dir='logs/default', max_workers=os.cpu_count(), chunksize=1, use_gpu=True, # GPU much faster gpu_ids=None)
data = [(config, seed, device, solution) for solution in solutions] out = pool.map(CloudpickleWrapper(fitness), data, chunksize=config['train.worker_chunksize']) Rs, Hs = zip(*out) es.tell(solutions, [-R for R in Rs]) logger = Logger() logger('generation', generation+1) logger('num_seconds', round(time.perf_counter() - t0, 1)) logger('Returns', describe(Rs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('Horizons', describe(Hs, axis=-1, repr_indent=1, repr_prefix='\n')) logger('fbest', es.result.fbest) train_logs.append(logger.logs) if generation == 0 or (generation+1) % config['log.freq'] == 0: logger.dump(keys=None, index=0, indent=0, border='-'*50) if (generation+1) >= int(config['train.generations']*(checkpoint_count/(config['checkpoint.num'] - 1))): agent.from_vec(tensorify(es.result.xbest, 'cpu')) agent.checkpoint(logdir, generation+1) checkpoint_count += 1 pickle_dump(obj=train_logs, f=logdir/'train_logs', ext='.pkl') return None if __name__ == '__main__': run_experiment(run=run, config=config, seeds=[1770966829, 1500925526, 2054191100], log_dir='logs/default', max_workers=7, # tune to fulfill computation power chunksize=1, use_gpu=False, gpu_ids=None)
args=(config, seed, make_env, agent, runner, queue)) for _ in range(config['agent.num_actors']) ] evaluator_process = mp.Process(target=evaluator, args=(config, logdir, seed, make_env, agent)) learner_process.start() print('Learner started !') [p.start() for p in actor_processes] print('Actors started !') evaluator_process.start() print('Evaluator started !') evaluator_process.join() [p.join() for p in actor_processes] learner_process.join() return None if __name__ == '__main__': mp.set_start_method('spawn') # IMPORTANT for agent.share_memory() torch.set_num_threads(1) # VERY IMPORTANT TO AVOID GETTING STUCK run_experiment( run=run, config=config, seeds=[1770966829], ###[1770966829, 1500925526, 2054191100], log_dir='logs/default', max_workers=None, ########os.cpu_count(), chunksize=1, use_gpu=True, # IMPALA benefits from GPU gpu_ids=None)