コード例 #1
0
script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py"
affinity_code = encode_affinity(n_cpu_core=16,
                                n_gpu=4,
                                hyperthread_offset=20,
                                n_socket=2
                                # cpu_per_run=2,
                                )
runs_per_setting = 2
experiment_title = "atari_ff_a2c_basic"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "0"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_config_key, ),
)
コード例 #2
0
affinity_code = quick_affinity_code(contexts_per_gpu=2, alternating=True)
runs_per_setting = 2
experiment_title = "dmlab_ppo_with_ul_prioritized_3"

variant_levels_1 = list()
variant_levels_2 = list()
variant_levels_3 = list()

stop_conv_grads = [False]
ul_update_schedules = ["constant_0"]
min_steps_rl = [0]
values = list(zip(stop_conv_grads, ul_update_schedules, min_steps_rl))
dir_names = ["{}stpcnv_{}_minrl{}".format(*v) for v in values]
keys = [("model", "stop_conv_grad"), ("algo", "ul_update_schedule"),
        ("algo", "min_steps_rl")]
variant_levels_1.append(VariantLevel(keys, values, dir_names))

stop_conv_grads = [False]
min_steps_ul = [2e4]
min_steps_rl = [1e5]
values = list(zip(stop_conv_grads, min_steps_ul, min_steps_rl))
dir_names = ["{}stpcnvgrd_{}ulminstepsrl{}".format(*v) for v in values]
keys = [("model", "stop_conv_grad"), ("algo", "min_steps_ul"),
        ("algo", "min_steps_rl")]
variant_levels_2.append(VariantLevel(keys, values, dir_names))

ul_update_schedules = ["constant_2"]
values = list(zip(ul_update_schedules))
dir_names = ["{}".format(*v) for v in values]
keys = [("algo", "ul_update_schedule")]
variant_levels_2.append(VariantLevel(keys, values, dir_names))
コード例 #3
0
OC_DELIB = list(zip([0., 0.01, 1.]))
OC_SIZES = list(zip([2,4]))
tasks = list(zip(['Ant']))
# Variant keys
lr_key = [("algo", "learning_rate")]
delib_key = [("algo", "delib_cost")]
oc_size_key = [("model", "option_size")]
interest_key = [("model", "use_interest")]
game_key = [("env", "task")]


task_names = ["{}".format(*v) for v in tasks]

experiment_title = "PPO_Isaac"
variant_levels = list()
variant_levels.append(VariantLevel(game_key, tasks, task_names))  # Games
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_ppo,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_key,),
)

experiment_title = "PPOC_Isaac"
variant_levels = list()
variant_levels.append(VariantLevel(game_key, tasks, task_names))  # Games
variants, log_dirs = make_variants(*variant_levels)
コード例 #4
0
lr_key = [("algo", "learning_rate")]
delib_key = [("algo", "delib_cost")]
fc_key = [("model", "fc_sizes")]
interest_key = [("model", "use_interest")]
game_key = [("env", "game")]
# Common directory names
lr_names_a2c = ["LR_{}".format(*v) for v in A2C_LRS]
lr_names_ppo = ["LR_{}".format(*v) for v in PPO_LRS]
delib_names = ["D_{}".format(*v) for v in OC_DELIB]
game_names = ["{}".format(*v) for v in games]
fc_names = ["MFC_{}".format(*v) for v in NOC_FC_SIZES]

# A2C
experiment_title = "A2C_Atari"
variant_levels = list()
variant_levels.append(VariantLevel(game_key, games, game_names))  # Games
variant_levels.append(VariantLevel(fc_key, NOC_FC_SIZES,
                                   fc_names))  # Smaller or larger model
variant_levels.append(VariantLevel(lr_key, A2C_LRS,
                                   lr_names_a2c))  # Learning rates
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_a2c,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_key, ),
)
コード例 #5
0
OC_DELIB = list(zip([0., 0.02]))
games = list(zip(['fruitbot', 'coinrun', 'caveflyer']))
# Variant keys
lr_key = [("algo", "learning_rate")]
delib_key = [("algo", "delib_cost")]
fc_key = [("model", "fc_sizes")]
interest_key = [("model", "use_interest")]
game_key = [("env", "game")]

game_names = ["{}".format(*v) for v in games]
delib_names = ["D_{}".format(*v) for v in OC_DELIB]

# PPO
experiment_title = "PPO_Procgen"
variant_levels = list()
variant_levels.append(VariantLevel(game_key, games, game_names))  # Games
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_ppo,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_key, ),
)

# PPOC
experiment_title = "PPOC_Procgen"
variant_levels = list()
variant_levels.append(VariantLevel(game_key, games, game_names))  # Games
コード例 #6
0
# variants, log_dirs = make_variants(*variant_levels)
# run_experiments(
#     script=path_a2c,
#     affinity_code=affinity_code,
#     experiment_title=experiment_title,
#     runs_per_setting=runs_per_setting,
#     variants=variants,
#     log_dirs=log_dirs,
#     common_args=(default_key,),
# )

# A2C RNN
experiment_title = "A2CRnn_Pomdp"
variant_levels = list()
# variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names))  # pomdps
variant_levels.append(VariantLevel(envs_plus_params_key, ENVS_PLUS_PARAMS, env_names))  # pomdps
variant_levels.append(VariantLevel(batch_T_key, B_T, ["{}".format(*v) for v in B_T]))  # pomdps
# variant_levels.append(VariantLevel(fomdp_key, FOMDP, obs_names))  # full or partial observability
# variant_levels.append(VariantLevel(rnn_type_key, RNN, rnn_names))  # Types of recurrency
variant_levels.append(VariantLevel(single_rnn_key, [single_rnn[-1]], [single_rnn_names[-1]]))  # Shared processor or unshared 1 rnn
variant_levels.append(VariantLevel(pa_key, pas_s, pa_names_s))  # Rnn Placement
variant_levels.append(VariantLevel(pr_key, prs_s, pr_names_s))  # Rnn Placement
# variant_levels.append(VariantLevel(rnn_size_key, RNN_SIZE, rnn_size_names))  # Sizes of recurrency
# variant_levels.append(VariantLevel(layer_norm_key, LAYER_NORM, ['rnn_norm', 'no_norm']))  # Sizes of recurrency
# variant_levels.append(VariantLevel(lr_key, lrs, [str(*v) for v in lrs]))  # Learning rates
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_a2c_rnn,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
コード例 #7
0
envs_plus_params_key = batch_B_key + batch_T_key + game_key + nstep_key
envs_plus_step_key = game_key + nstep_key
ENVS_PLUS_STEPS = list(zip(['POMDP-hallway-episodic-v0', 'POMDP-hallway2-episodic-v0'], [int(2e6), int(5e6)]))
# Common directory names
env_names = ["{}".format(*v) for v in ENVS]
env_name = ["{}".format(*v) for v in ENV]
# rnn_names = ["{}".format(*v) for v in RNN]
rnn_size_names = ["RNNH_{}".format(*v) for v in RNN_SIZE]
rnn_place_names = ["{}".format(v) for v in ['Before', 'After']]
shared_proc_names = ["{}".format(v) for v in ['Unshared', 'Shared']]

# A2C RNN
experiment_title = "PPORnn_Pomdp"
variant_levels = list()
# variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names))  # pomdps
variant_levels.append(VariantLevel(envs_plus_step_key, ENVS_PLUS_STEPS, env_names))  # pomdps
# variant_levels.append(VariantLevel(batch_T_key, B_T, ["{}".format(*v) for v in B_T]))  # pomdps
# variant_levels.append(VariantLevel(fomdp_key, FOMDP, obs_names))  # full or partial observability
# variant_levels.append(VariantLevel(rnn_type_key, RNN, rnn_names))  # Types of recurrency
# variant_levels.append(VariantLevel(shared_proc_key, SHARED_PROC, shared_proc_names))  # Sizes of recurrency
# variant_levels.append(VariantLevel(rnn_place_key, RNN_PLACE, rnn_place_names))  # Sizes of recurrency
# variant_levels.append(VariantLevel(rnn_size_key, RNN_SIZE, rnn_size_names))  # Sizes of recurrency
# variant_levels.append(VariantLevel(lr_key, lrs, [str(*v) for v in lrs]))  # Learning rates
variant_levels.append(VariantLevel(lr_schedules_key, lr_schedules, ['Constant', 'Linear']))  # Learning rate schedules
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_ppo_rnn,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
コード例 #8
0
# Common directory names
env_names = ["{}".format(*v) for v in ENVS]
env_name = ["{}".format(*v) for v in ENV]
rnn_names = ["{}".format(*v) for v in RNN]
rnn_size_names = ["RNNH_{}".format(*v) for v in RNN_SIZE]
delib_names = ["DELIB_{}".format(*v) for v in OC_DELIB]
obs_names = ["OMNI_{}".format(*v) for v in FOMDP]
int_names = ["INT_{}".format(*v) for v in INTEREST]
nopt_names = ["NOPT_{}".format(*v) for v in NUM_OPTIONS]

# A2C
experiment_title = "A2C_Pomdp"
variant_levels = list()
# variant_levels.append(VariantLevel(B_T_env_key, ENVS_PLUS_B_T, env_names))  # pomdps
variant_levels.append(
    VariantLevel(envs_plus_params_key, ENVS_PLUS_PARAMS, env_names))  # pomdps
variant_levels.append(VariantLevel(fomdp_key, FOMDP,
                                   obs_names))  # full or partial observability
variants, log_dirs = make_variants(*variant_levels)
run_experiments(
    script=path_a2c,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_key, ),
)

# A2C RNN
# experiment_title = "A2CRnn_Pomdp"