def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T= 4, # Four time-steps per sampler iteration. 在collector中采样数据的时候每个循环走多少个step batch_B=1, # 有多少个并行的environment实例 max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() # 在sampler中initialize runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, # 总共多少个step log_interval_steps=1e3, # 每多少个step记录一次日志 affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), CollectorCls=ResetCollector, eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config): runner.train()
def test_rlpyt_simple(): """ partially copied from example 1 """ game = "pong" run_ID = 0 cuda_idx = None n_steps = 1 sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3, replay_size=1e3) # remove memory issues agent = AtariDqnAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=n_steps, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "test_example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="academy_empty_goal_close", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=create_single_football_env, env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx), ) config = dict(game=game) name = "dqn_" + game #log_dir = "example_1" log_dir = get_outputs_path() with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="pong", run_ID=0, cuda_idx=None, n_parallel=2): config = dict( env=dict(game=game), algo=dict(batch_size=128), sampler=dict(batch_T=2, batch_B=32), ) sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=dict(game=game), CollectorCls=GpuWaitResetCollector, eval_env_kwargs=dict(game=game), max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e3), eval_max_trajectories=5, # batch_T=4, # Get from config. # batch_B=1, **config[ "sampler"] # More parallel environments for batched forward-pass. ) algo = DQN(**config["algo"]) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))), ) name = "dqn_" + game log_dir = "example_5" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): affinity = affinity_from_code(slot_affinity_code) config = configs[config_key] variant = load_variant(log_dir) config = update_config(config, variant) config["eval_env"]["game"] = config["env"]["game"] sampler = GpuSampler( EnvCls=AtariEnv, env_kwargs=config["env"], CollectorCls=WaitResetCollector, TrajInfoCls=AtariTrajInfo, eval_env_kwargs=config["eval_env"], **config["sampler"] ) algo = DQN(optim_kwargs=config["optim"], **config["algo"]) agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"] ) name = config["env"]["game"] with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(cfg, game="ftwc", run_ID=0): #GVS NOTE: for ftwc/qait ?use CpuWaitResetCollector (or CpuResetCollector) sampler = SerialSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=10, eval_max_steps=int(10e2), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e2) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=dict(cuda_idx=cfg.cuda_idx), ) config = dict(game=game) name = "dqn_" + game log_dir = "ftwc" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(level="nav_maze_random_goal_01", run_ID=0, cuda_idx=None): sampler = SerialSampler( EnvCls=DeepmindLabEnv, env_kwargs=dict(level=level), eval_env_kwargs=dict(level=level), batch_T=4, # Four time-steps per sampler iteration. batch_B=1, max_decorrelation_steps=0, eval_n_envs=5, eval_max_steps=int(10e3), eval_max_trajectories=5, ) algo = DQN(min_steps_learn=1e3) # Run with defaults. agent = AtariDqnAgent() runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e5, affinity=dict(cuda_idx=cuda_idx), ) config = dict(level=level) name = "lab_dqn" log_dir = "lab_example_1" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(run_ID=0, cuda_idx=None, n_parallel=2, serial_sampling=False): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))) device = "cpu" if cuda_idx is None else f"gpu {cuda_idx}" if serial_sampling: Sampler = SerialSampler # ignores workers_cpus print(f"Using serial sampler w/ {device} for action sampling and optimization") else: Sampler = CpuSampler if cuda_idx is None else GpuSampler print(f"Using parallel sampler w/ {device} for action sampling and optimization") game = "pong" sampler = Sampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=1, batch_B=8, # number of game running in parallel max_decorrelation_steps=0 ) # load target observations along with respective thresholds and target actions target_obs = [] target_info = {} for idx, (tpath, tthresh, ttarg) in enumerate(TARGET_META): with open(tpath, "rb") as f: tob = pickle.load(f) target_obs.append(tob) target_info[idx] = (tthresh, ttarg) target_obs = np.asarray(target_obs).transpose(0, 3, 1, 2) # N, H, W, C --> N, C, H, W # adversary algorithm (subsumes agent DQN algorithm) algo = FixedAttackerDQN( target_obs, target_info, contrast_sd_path, dqn_oracle_sd_path, delta_bound=1.0, first_poison_itr=1, min_steps_learn=1e3 ) agent = AtariDqnAgent() runner = MinibatchRl( algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=affinity ) config = dict(game=game) name = "rp_fixed_attack_dqn_" + game log_dir = "rp_fixed_attack" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(game="pong", run_ID=0): # Change these inputs to match local machine and desired parallelism. affinity = make_affinity( run_slot=0, n_cpu_core=8, # Use 16 cores across all experiments. n_gpu=2, # Use 8 gpus across all experiments. gpu_per_run=1, sample_gpu_per_run=1, async_sample=True, optim_sample_share_gpu=False, # hyperthread_offset=24, # If machine has 24 cores. # n_socket=2, # Presume CPU socket affinity to lower/upper half GPUs. # gpu_per_run=2, # How many GPUs to parallelize one run across. # cpu_per_run=1, ) sampler = AsyncGpuSampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, env_kwargs=dict(game=game), batch_T=5, batch_B=36, max_decorrelation_steps=100, eval_env_kwargs=dict(game=game), eval_n_envs=2, eval_max_steps=int(10e3), eval_max_trajectories=4, ) algo = DQN( replay_ratio=8, min_steps_learn=1e4, replay_size=int(1e5) ) agent = AtariDqnAgent() runner = AsyncRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=2e6, log_interval_steps=1e4, affinity=affinity, ) config = dict(game=game) name = "async_dqn_" + game log_dir = "async_dqn" with logger_context(log_dir, run_ID, name, config): runner.train()
def build_and_train(run_ID=0, cuda_idx=None, n_parallel=2, serial_sampling=False): affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))) device = "CPU" if cuda_idx is None else f"GPU {cuda_idx}" if serial_sampling: Sampler = SerialSampler # ignores workers_cpus print( f"Using serial sampler w/ {device} for action sampling and optimization" ) else: Sampler = CpuSampler if cuda_idx is None else GpuSampler print( f"Using parallel sampler w/ {device} for action sampling and optimization" ) game = "pong" sampler = Sampler( EnvCls=AtariEnv, TrajInfoCls=AtariTrajInfo, # default traj info + GameScore env_kwargs=dict(game=game), eval_env_kwargs=dict(game=game), batch_T=1, batch_B=8, # number of game running in parallel max_decorrelation_steps=0) ### ALGO GOES HERE algo = None agent = AtariDqnAgent() runner = MinibatchRl(algo=algo, agent=agent, sampler=sampler, n_steps=50e6, log_interval_steps=1e3, affinity=affinity) config = dict(game=game) name = "rp_attack_dqn_" + game log_dir = "rp_attack" with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"): runner.train()
def build_and_train(id="SurfaceCode-v0", name='run', log_dir='./logs'): # Change these inputs to match local machine and desired parallelism. # affinity = make_affinity( # n_cpu_core=24, # Use 16 cores across all experiments. # n_gpu=1, # Use 8 gpus across all experiments. # async_sample=True, # set_affinity=True # ) # affinity['optimizer'][0]['cuda_idx'] = 1 num_cpus = multiprocessing.cpu_count() affinity = make_affinity(n_cpu_core=num_cpus//2, cpu_per_run=num_cpus//2, n_gpu=0, async_sample=False, set_affinity=True) affinity['workers_cpus'] = tuple(range(num_cpus)) affinity['master_torch_threads'] = 28 # env_kwargs = dict(id='SurfaceCode-v0', error_model='X', volume_depth=5) state_dict = None # torch.load('./logs/run_29/params.pkl', map_location='cpu') agent_state_dict = None #state_dict['agent_state_dict']['model'] optim_state_dict = None #state_dict['optimizer_state_dict'] # sampler = AsyncCpuSampler( sampler = CpuSampler( # sampler=SerialSampler( EnvCls=make_qec_env, # TrajInfoCls=AtariTrajInfo, env_kwargs=dict(error_rate=0.005, error_model='DP'), batch_T=10, batch_B=num_cpus * 10, max_decorrelation_steps=100, eval_env_kwargs=dict(error_rate=0.005, error_model='DP', fixed_episode_length=5000), eval_n_envs=num_cpus, eval_max_steps=int(1e6), eval_max_trajectories=num_cpus, TrajInfoCls=EnvInfoTrajInfo ) algo = DQN( replay_ratio=8, learning_rate=1e-5, min_steps_learn=1e4, replay_size=int(5e4), batch_size=32, double_dqn=True, # target_update_tau=0.002, target_update_interval=5000, ReplayBufferCls=UniformReplayBuffer, initial_optim_state_dict=optim_state_dict, eps_steps=2e6, ) agent = AtariDqnAgent(model_kwargs=dict(channels=[32, 64, 64], kernel_sizes=[3, 2, 2], strides=[2, 1, 1], paddings=[0, 0, 0], fc_sizes=[512, ], dueling=True), ModelCls=QECModel, eps_init=1, eps_final=0.02, eps_itr_max=int(5e6), eps_eval=0, initial_model_state_dict=agent_state_dict) # agent = DqnAgent(ModelCls=FfModel) runner = QECSynchronousRunner( # runner = MinibatchRlEval( algo=algo, agent=agent, sampler=sampler, n_steps=1e9, log_interval_steps=3e5, affinity=affinity, ) config = dict(game=id) config_logger(log_dir, name=name, snapshot_mode='last', log_params=config) # with logger_context(log_dir, run_ID, name, config): runner.train()