def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_gpu_" + config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None): # Either manually set the resources for the experiment: affinity_code = encode_affinity( n_cpu_core=2, n_gpu=1, # hyperthread_offset=8, # if auto-detect doesn't work, number of CPU cores # n_socket=1, # if auto-detect doesn't work, can force (or force to 1) run_slot=0, cpu_per_run=1, set_affinity=True, # it can help to restrict workers to individual CPUs ) affinity = affinity_from_code(affinity_code) config = configs["r2d1"] config["env"]["game"] = game config["eval_env"]["game"] = config["env"]["game"] sampler = AsyncGpuSampler(EnvCls=voxel_make, env_kwargs=config["env"], CollectorCls=DbGpuResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"]) algo = R2D1(optim_kwargs=config["optim"], **config["algo"]) agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"]) runner = AsyncRlEval(algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]) config = dict(game=game) name = "r2d1_" + game log_dir = "tower_building" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) sampler = AsyncSerialSampler( EnvCls=gym_make, env_kwargs=config["env"], eval_env_kwargs=config["env"], CollectorCls=DbCpuResetCollector, **config["sampler"] ) algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) agent = DdpgAgent(**config["agent"]) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = "async_ddpg_" + config["env"]["id"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="pong", run_ID=0): # Change these inputs to match local machine and desired parallelism. affinity = make_affinity( run_slot=0, n_cpu_core=8, # Use 16 cores across all experiments. n_gpu=2, # Use 8 gpus across all experiments. gpu_per_run=1, sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=False, # hyperthread_offset=24, # If machine has 24 cores. # n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. # gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) sampler = AsyncGpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=dict(game=game), batch_T=5, batch_B=36, max_decorrelation_steps=100, eval_env_kwargs=dict(game=game), eval_n_envs=2, eval_max_steps=int(10e3), eval_max_trajectories=4, ) algo = DQN( replay_ratio=8, min_steps_learn=1e4, replay_size=int(1e5) ) agent = AtariDqnAgent() runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=2e6, log_interval_steps=1e4, affinity=affinity, ) config = dict(game=game) name = "async_dqn_" + game log_dir = "async_dqn" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(game="aaai_multi", run_ID=0): # Change these inputs to match local machine and desired parallelism. affinity = make_affinity( run_slot=0, n_cpu_core=8, # Use 16 cores across all experiments. n_gpu=1, # Use 8 gpus across all experiments. sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=True # hyperthread_offset=24, # If machine has 24 cores. # n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. # gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) train_conf = PytConfig([ Path(JSONS_FOLDER, 'configs', '2v2', 'all_equal.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_horizontally.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_vertically.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_west.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_east.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_north.json'), Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_south.json'), ]) eval_conf = PytConfig({ 'all_equal': Path(JSONS_FOLDER, 'configs', '2v2', 'all_equal.json'), 'more_horizontally': Path(JSONS_FOLDER, 'configs', '2v2', 'more_horizontally.json'), 'more_vertically': Path(JSONS_FOLDER, 'configs', '2v2', 'more_vertically.json'), 'more_south': Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_south.json'), 'more_east': Path(JSONS_FOLDER, 'configs', '2v2', 'more_from_east.json') }) sampler = AsyncGpuSampler( EnvCls=Rlpyt_env, TrajInfoCls=AaaiTrajInfo, env_kwargs={ 'pyt_conf': train_conf, 'max_steps': 3000 }, batch_T=8, batch_B=8, max_decorrelation_steps=100, eval_env_kwargs={ 'pyt_conf': eval_conf, 'max_steps': 3000 }, eval_max_steps=24100, eval_n_envs=2, ) algo = DQN( replay_ratio=1024, double_dqn=True, prioritized_replay=True, min_steps_learn=5000, learning_rate=0.0001, target_update_tau=1.0, target_update_interval=1000, eps_steps=5e4, batch_size=512, pri_alpha=0.6, pri_beta_init=0.4, pri_beta_final=1., pri_beta_steps=int(7e4), replay_size=int(1e6), clip_grad_norm=1.0, updates_per_sync=6 ) agent = DqnAgent(ModelCls=Frap) runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, log_interval_steps=1000, affinity=affinity, n_steps=6e5 ) config = dict(game=game) name = "frap_" + game log_dir = Path(PROJECT_ROOT, "saved", "rlpyt", "multi", "frap") save_path = Path(log_dir, 'run_{}'.format(run_ID)) for f in save_path.glob('**/*'): print(f) f.unlink() with logger_context(str(log_dir), run_ID, name, config, snapshot_mode='last', use_summary_writer=True, override_prefix=True): runner.train()