script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" affinity_code = encode_affinity(n_cpu_core=16, n_gpu=4, hyperthread_offset=20, n_socket=2 # cpu_per_run=2, ) runs_per_setting = 2 experiment_title = "atari_ff_a2c_basic" variant_levels = list() games = ["pong", "seaquest", "qbert", "chopper_command"] values = list(zip(games)) dir_names = ["{}".format(*v) for v in values] keys = [("env", "game")] variant_levels.append(VariantLevel(keys, values, dir_names)) variants, log_dirs = make_variants(*variant_levels) default_config_key = "0" run_experiments( script=script, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_config_key, ), )
affinity_code = quick_affinity_code(contexts_per_gpu=2, alternating=True) runs_per_setting = 2 experiment_title = "dmlab_ppo_with_ul_prioritized_3" variant_levels_1 = list() variant_levels_2 = list() variant_levels_3 = list() stop_conv_grads = [False] ul_update_schedules = ["constant_0"] min_steps_rl = [0] values = list(zip(stop_conv_grads, ul_update_schedules, min_steps_rl)) dir_names = ["{}stpcnv_{}_minrl{}".format(*v) for v in values] keys = [("model", "stop_conv_grad"), ("algo", "ul_update_schedule"), ("algo", "min_steps_rl")] variant_levels_1.append(VariantLevel(keys, values, dir_names)) stop_conv_grads = [False] min_steps_ul = [2e4] min_steps_rl = [1e5] values = list(zip(stop_conv_grads, min_steps_ul, min_steps_rl)) dir_names = ["{}stpcnvgrd_{}ulminstepsrl{}".format(*v) for v in values] keys = [("model", "stop_conv_grad"), ("algo", "min_steps_ul"), ("algo", "min_steps_rl")] variant_levels_2.append(VariantLevel(keys, values, dir_names)) ul_update_schedules = ["constant_2"] values = list(zip(ul_update_schedules)) dir_names = ["{}".format(*v) for v in values] keys = [("algo", "ul_update_schedule")] variant_levels_2.append(VariantLevel(keys, values, dir_names))
OC_DELIB = list(zip([0., 0.01, 1.])) OC_SIZES = list(zip([2,4])) tasks = list(zip(['Ant'])) # Variant keys lr_key = [("algo", "learning_rate")] delib_key = [("algo", "delib_cost")] oc_size_key = [("model", "option_size")] interest_key = [("model", "use_interest")] game_key = [("env", "task")] task_names = ["{}".format(*v) for v in tasks] experiment_title = "PPO_Isaac" variant_levels = list() variant_levels.append(VariantLevel(game_key, tasks, task_names)) # Games variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_ppo, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_key,), ) experiment_title = "PPOC_Isaac" variant_levels = list() variant_levels.append(VariantLevel(game_key, tasks, task_names)) # Games variants, log_dirs = make_variants(*variant_levels)
lr_key = [("algo", "learning_rate")] delib_key = [("algo", "delib_cost")] fc_key = [("model", "fc_sizes")] interest_key = [("model", "use_interest")] game_key = [("env", "game")] # Common directory names lr_names_a2c = ["LR_{}".format(*v) for v in A2C_LRS] lr_names_ppo = ["LR_{}".format(*v) for v in PPO_LRS] delib_names = ["D_{}".format(*v) for v in OC_DELIB] game_names = ["{}".format(*v) for v in games] fc_names = ["MFC_{}".format(*v) for v in NOC_FC_SIZES] # A2C experiment_title = "A2C_Atari" variant_levels = list() variant_levels.append(VariantLevel(game_key, games, game_names)) # Games variant_levels.append(VariantLevel(fc_key, NOC_FC_SIZES, fc_names)) # Smaller or larger model variant_levels.append(VariantLevel(lr_key, A2C_LRS, lr_names_a2c)) # Learning rates variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_a2c, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_key, ), )
OC_DELIB = list(zip([0., 0.02])) games = list(zip(['fruitbot', 'coinrun', 'caveflyer'])) # Variant keys lr_key = [("algo", "learning_rate")] delib_key = [("algo", "delib_cost")] fc_key = [("model", "fc_sizes")] interest_key = [("model", "use_interest")] game_key = [("env", "game")] game_names = ["{}".format(*v) for v in games] delib_names = ["D_{}".format(*v) for v in OC_DELIB] # PPO experiment_title = "PPO_Procgen" variant_levels = list() variant_levels.append(VariantLevel(game_key, games, game_names)) # Games variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_ppo, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_key, ), ) # PPOC experiment_title = "PPOC_Procgen" variant_levels = list() variant_levels.append(VariantLevel(game_key, games, game_names)) # Games
# variants, log_dirs = make_variants(*variant_levels) # run_experiments( # script=path_a2c, # affinity_code=affinity_code, # experiment_title=experiment_title, # runs_per_setting=runs_per_setting, # variants=variants, # log_dirs=log_dirs, # common_args=(default_key,), # ) # A2C RNN experiment_title = "A2CRnn_Pomdp" variant_levels = list() # variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names)) # pomdps variant_levels.append(VariantLevel(envs_plus_params_key, ENVS_PLUS_PARAMS, env_names)) # pomdps variant_levels.append(VariantLevel(batch_T_key, B_T, ["{}".format(*v) for v in B_T])) # pomdps # variant_levels.append(VariantLevel(fomdp_key, FOMDP, obs_names)) # full or partial observability # variant_levels.append(VariantLevel(rnn_type_key, RNN, rnn_names)) # Types of recurrency variant_levels.append(VariantLevel(single_rnn_key, [single_rnn[-1]], [single_rnn_names[-1]])) # Shared processor or unshared 1 rnn variant_levels.append(VariantLevel(pa_key, pas_s, pa_names_s)) # Rnn Placement variant_levels.append(VariantLevel(pr_key, prs_s, pr_names_s)) # Rnn Placement # variant_levels.append(VariantLevel(rnn_size_key, RNN_SIZE, rnn_size_names)) # Sizes of recurrency # variant_levels.append(VariantLevel(layer_norm_key, LAYER_NORM, ['rnn_norm', 'no_norm'])) # Sizes of recurrency # variant_levels.append(VariantLevel(lr_key, lrs, [str(*v) for v in lrs])) # Learning rates variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_a2c_rnn, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting,
envs_plus_params_key = batch_B_key + batch_T_key + game_key + nstep_key envs_plus_step_key = game_key + nstep_key ENVS_PLUS_STEPS = list(zip(['POMDP-hallway-episodic-v0', 'POMDP-hallway2-episodic-v0'], [int(2e6), int(5e6)])) # Common directory names env_names = ["{}".format(*v) for v in ENVS] env_name = ["{}".format(*v) for v in ENV] # rnn_names = ["{}".format(*v) for v in RNN] rnn_size_names = ["RNNH_{}".format(*v) for v in RNN_SIZE] rnn_place_names = ["{}".format(v) for v in ['Before', 'After']] shared_proc_names = ["{}".format(v) for v in ['Unshared', 'Shared']] # A2C RNN experiment_title = "PPORnn_Pomdp" variant_levels = list() # variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names)) # pomdps variant_levels.append(VariantLevel(envs_plus_step_key, ENVS_PLUS_STEPS, env_names)) # pomdps # variant_levels.append(VariantLevel(batch_T_key, B_T, ["{}".format(*v) for v in B_T])) # pomdps # variant_levels.append(VariantLevel(fomdp_key, FOMDP, obs_names)) # full or partial observability # variant_levels.append(VariantLevel(rnn_type_key, RNN, rnn_names)) # Types of recurrency # variant_levels.append(VariantLevel(shared_proc_key, SHARED_PROC, shared_proc_names)) # Sizes of recurrency # variant_levels.append(VariantLevel(rnn_place_key, RNN_PLACE, rnn_place_names)) # Sizes of recurrency # variant_levels.append(VariantLevel(rnn_size_key, RNN_SIZE, rnn_size_names)) # Sizes of recurrency # variant_levels.append(VariantLevel(lr_key, lrs, [str(*v) for v in lrs])) # Learning rates variant_levels.append(VariantLevel(lr_schedules_key, lr_schedules, ['Constant', 'Linear'])) # Learning rate schedules variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_ppo_rnn, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants,
# Common directory names env_names = ["{}".format(*v) for v in ENVS] env_name = ["{}".format(*v) for v in ENV] rnn_names = ["{}".format(*v) for v in RNN] rnn_size_names = ["RNNH_{}".format(*v) for v in RNN_SIZE] delib_names = ["DELIB_{}".format(*v) for v in OC_DELIB] obs_names = ["OMNI_{}".format(*v) for v in FOMDP] int_names = ["INT_{}".format(*v) for v in INTEREST] nopt_names = ["NOPT_{}".format(*v) for v in NUM_OPTIONS] # A2C experiment_title = "A2C_Pomdp" variant_levels = list() # variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names)) # pomdps variant_levels.append( VariantLevel(envs_plus_params_key, ENVS_PLUS_PARAMS, env_names)) # pomdps variant_levels.append(VariantLevel(fomdp_key, FOMDP, obs_names)) # full or partial observability variants, log_dirs = make_variants(*variant_levels) run_experiments( script=path_a2c, affinity_code=affinity_code, experiment_title=experiment_title, runs_per_setting=runs_per_setting, variants=variants, log_dirs=log_dirs, common_args=(default_key, ), ) # A2C RNN # experiment_title = "A2CRnn_Pomdp"