script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py"
affinity_code = encode_affinity(n_cpu_core=16,
                                n_gpu=4,
                                hyperthread_offset=20,
                                n_socket=2
                                # cpu_per_run=2,
                                )
runs_per_setting = 2
experiment_title = "atari_ff_a2c_basic"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "0"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_config_key, ),
)
    "explore_goal_locations_large",
]
entropies = [
    # 0.0003,  # I actually don't know if this is the right entropy...
    # 0.0003,
    # 0.001,
    0.01,
]
values = list(zip(levels, entropies))
dir_names = levels
keys = [("env", "level"), ("algo", "entropy_loss_coeff")]
variant_levels_1.append(VariantLevel(keys, values, dir_names))
# variant_levels_2.append(VariantLevel(keys, values, dir_names))
# variant_levels_3.append(VariantLevel(keys, values, dir_names))

variants_1, log_dirs_1 = make_variants(*variant_levels_1)
# variants_2, log_dirs_2 = make_variants(*variant_levels_2)
# variants_3, log_dirs_3 = make_variants(*variant_levels_3)

variants = variants_1  # + variants_2 + variants_3
log_dirs = log_dirs_1  # + log_dirs_2 + log_dirs_3

num_variants = len(variants)
variants_per = num_variants // num_computers

my_start = my_computer * variants_per
if my_computer == num_computers - 1:
    my_end = num_variants
else:
    my_end = (my_computer + 1) * variants_per
my_variants = variants[my_start:my_end]
]
entropies = [
    0.0002,
    # 0.0003,  # I actually don't know if this is the right entropy...
    # 0.0003,
    # 0.001,
    # 0.01,
]
values = list(zip(levels, entropies))
dir_names = levels
keys = [("env", "level"), ("algo", "entropy_loss_coeff")]
variant_levels_1.append(VariantLevel(keys, values, dir_names))
variant_levels_2.append(VariantLevel(keys, values, dir_names))
variant_levels_3.append(VariantLevel(keys, values, dir_names))

variants_1, log_dirs_1 = make_variants(*variant_levels_1)
variants_2, log_dirs_2 = make_variants(*variant_levels_2)
variants_3, log_dirs_3 = make_variants(*variant_levels_3)

variants = variants_1 + variants_2 + variants_3
log_dirs = log_dirs_1 + log_dirs_2 + log_dirs_3

num_variants = len(variants)
variants_per = num_variants // num_computers

my_start = my_computer * variants_per
if my_computer == num_computers - 1:
    my_end = num_variants
else:
    my_end = (my_computer + 1) * variants_per
my_variants = variants[my_start:my_end]
Ejemplo n.º 4
0
# env_ids = ["Hopper-v3", "Walker2d-v3"]
# values = list(zip(env_ids))
# dir_names = ["{}".format(*v) for v in values]
# keys = [("env", "id")]
# variant_levels_1M.append(VariantLevel(keys, values, dir_names))

# env_ids = ["Ant-v3", "HalfCheetah-v3"]
env_ids = ["HalfCheetah-v3"]
values = list(zip(env_ids))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))


# variants_1M, log_dirs_1M = make_variants(*variant_levels_1M)
variants_3M, log_dirs_3M = make_variants(*variant_levels_3M)
variants = variants_3M  # + variants_1M
log_dirs = log_dirs_3M  # + log_dirs_1M

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_config_key,),
)

Ejemplo n.º 5
0
# variant_levels_3M.append(VariantLevel(keys, values, dir_names))

env_ids = ["Walker2d-v2"
           ]  #, "HalfCheetah-v2", "Humanoid-v2", "Swimmer-v2", "Hopper-v2"]
values = list(zip(env_ids))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels_1M.append(VariantLevel(keys, values, dir_names))

# env_ids = ["Ant-v3", "HalfCheetah-v3"]
# values = list(zip(env_ids))
# dir_names = ["{}".format(*v) for v in values]
# keys = [("env", "id")]
# variant_levels_3M.append(VariantLevel(keys, values, dir_names))

variants_1M, log_dirs_1M = make_variants(*variant_levels_1M)
#variants_3M, log_dirs_3M = make_variants(*variant_levels_3M)
variants = variants_1M  #+ variants_3M
log_dirs = log_dirs_1M  #+ log_dirs_3M

default_config_key = "ppo_1M_serial"
script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_serial.py"
experiment_title = "ppo_no_clip"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,