Example #1
0
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=2,
        n_gpu=1,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        run_slot=0,
        cpu_per_run=1,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(affinity_code)
    config = configs["r2d1"]
    config["env"]["game"] = game
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(EnvCls=voxel_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbGpuResetCollector,
                              TrajInfoCls=AtariTrajInfo,
                              eval_env_kwargs=config["eval_env"],
                              **config["sampler"])
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    config = dict(game=game)
    name = "r2d1_" + game
    log_dir = "tower_building"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
Example #2
0
def run_task(vv, log_dir, exp_name):
    vv = update_env_kwargs(vv)
    run_ID = vv['seed']
    config_key = vv['config_key']
    slot_affinity_code = encode_affinity(
        n_cpu_core=20,
        n_gpu=2,
        n_socket=2,
        run_slot=0,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    config.update(**vv)
    # config["env"] = env_arg_dict[config['env_name']]
    vv['env_kwargs']['headless'] = True

    sac_module = 'rlpyt.algos.qpg.{}'.format(config['sac_module'])
    sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module'])

    sac_module = importlib.import_module(sac_module)
    sac_agent_module = importlib.import_module(sac_agent_module)

    SAC = sac_module.SAC
    SacAgent = sac_agent_module.SacAgent

    if 'pixel_wrapper_kwargs' in config['env']:
        info_keys = config.get('info_keys', None)
        state_keys = config.get('state_keys', None)
        init_namedtuples(info_keys=info_keys, state_keys=state_keys)

    sampler = CpuSampler(
        EnvCls=SOFTGYM_ENVS[vv['env_name']],
        env_kwargs=vv['env_kwargs'],
        CollectorCls=CpuResetCollector,
        eval_env_kwargs=vv['env_kwargs'],
        **config["sampler"]
    )
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "sac_{}".format(vv['env_name'])

    with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'):
        runner.train()
Example #3
0
def build_and_train(game="pong", run_ID=0):
    # Seems like we should be able to skip the intermediate step of the code,
    # but so far have just always run that way.
    # Change these inputs to match local machine and desired parallelism.
    affinity_code = encode_affinity(
        n_cpu_cores=16,  # Use 16 cores across all experiments.
        n_gpu=8,  # Use 8 gpus across all experiments.
        hyperthread_offset=24,  # If machine has 24 cores.
        n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        gpu_per_run=2,  # How many GPUs to parallelize one run across.
        # cpu_per_run=1,
    )
    slot_affinity_code = prepend_run_slot(run_slot=0, affinity_code=affinity_code)
    affinity = get_affinity(slot_affinity_code)
    breakpoint()

    sampler = GpuParallelSampler(
        EnvCls=AtariEnv,
        env_kwargs=dict(game=game),
        CollectorCls=WaitResetCollector,
        batch_T=5,
        batch_B=16,
        max_decorrelation_steps=400,
    )
    algo = A2C()  # Run with defaults.
    agent = AtariFfAgent()
    runner = MultiGpuRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e5,
        affinity=affinity,
    )
    config = dict(game=game)
    name = "a2c_" + game
    log_dir = "example_7"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Example #4
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel
import pathlib

path = pathlib.Path(
    __file__).resolve().parent.parent / 'train' / "mujoco_ff_ppoc_gpu.py"
script = path.as_posix()
# script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_gpu.py"
affinity_code = encode_affinity(n_cpu_core=6,
                                n_gpu=1,
                                contexts_per_gpu=6,
                                n_socket=1,
                                alternating=True)
runs_per_setting = 6
default_config_key = "ppoc_1M_halfcheetahtransfer"
experiment_title = "PPOC_Transfer"
variant_levels = list()

variants, log_dirs = make_variants(*variant_levels)

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
    log_dirs=log_dirs,
    common_args=(default_config_key, ),
)
Example #5
0
import multiprocessing
import GPUtil
from rlpyt.utils.launching.affinity import encode_affinity, quick_affinity_code
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel


# script to launch hyperparameter search

num_cpus = multiprocessing.cpu_count()
num_gpus = len(GPUtil.getAvailable())

affinity_code = encode_affinity(
    n_cpu_core=num_cpus,
    n_gpu=num_gpus,
    set_affinity=True,  # it can help to restrict workers to individual CPUs
)

runs_per_setting = 1
experiment_title = "parkour_challenge"

variants = [
    {
        'algo': 'ppo',
        'ppo_kwargs': dict(minibatches=4),
        'sampler_kwargs': dict(batch_B=32)
    },
    {
        'algo': 'ppo',
        'ppo_kwargs': dict(minibatches=32),
        'sampler_kwargs': dict(batch_B=32)
Example #6
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=4,
    hyperthread_offset=24,
    n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_r2d1_long"
variant_levels = list()

games = ["gravitar", "asteroids"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "r2d1_long"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py"
affinity_code = encode_affinity(
    n_cpu_core=20,
    n_gpu=4,
    async_sample=True,
    gpu_per_run=1,
    sample_gpu_per_run=2,
    # hyperthread_offset=24,
    # optim_sample_share_gpu=True,
    n_socket=1,  # Force this.
    alternating=True,
)
runs_per_setting = 1
experiment_title = "atari_r2d1_async_alt"
variant_levels = list()

games = ["seaquest"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "async_alt_got"

run_experiments(
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_sac_async_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=4,
    # contexts_per_gpu=2,
    async_sample=True,
    sample_gpu_per_run=1,
    # hyperthread_offset=2,
    # n_socket=1,
    # cpu_per_run=1,
)
runs_per_setting = 2
default_config_key = "async_gpu"
experiment_title = "sac_mujoco_async"
variant_levels = list()

env_ids = ["Hopper-v3", "HalfCheetah-v3"]  # , "Swimmer-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

run_experiments(
    script=script,
    affinity_code=affinity_code,
Example #9
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py"
affinity_code = encode_affinity(
    n_cpu_core=8,
    n_gpu=2,
    hyperthread_offset=8,
    n_socket=1,
    contexts_per_run=2,
    # cpu_per_run=2,
)
runs_per_setting = 1
experiment_title = "atari_ff_a2c_multi"
variant_levels = list()

games = ["pong"]  # , "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "0"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
Example #10
0
import pathlib
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

PHYSX_N_THREADS = 4  # By default, isaac environments use 4 cores by themselves. Each sampler will use its own as well
affinity_code = encode_affinity(
    n_cpu_core=6,
    n_gpu=1,
    contexts_per_gpu=2,  # This thing chews GPU memory
    n_socket=1,
    alternating=False
)

runs_per_setting = 3  # 3 runs
# Paths
path_ppo = (pathlib.Path(__file__).resolve().parent.parent / 'train' / "isaac_ff_ppo_gpu.py").as_posix()
path_ppoc = (pathlib.Path(__file__).resolve().parent.parent / 'train' / "isaac_ff_ppoc_gpu.py").as_posix()
# Default keys
default_key = 'nv_ant'
default_oc_key = 'nv_ant_oc'
# Param options
PPO_LRS = list(zip([1e-4, 3e-4, 1e-3]))
OC_DELIB = list(zip([0., 0.01, 1.]))
OC_SIZES = list(zip([2,4]))
tasks = list(zip(['Ant']))
# Variant keys
lr_key = [("algo", "learning_rate")]
delib_key = [("algo", "delib_cost")]
oc_size_key = [("model", "option_size")]
interest_key = [("model", "use_interest")]
Example #11
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_cpu.py"
# default_config_key = "0"
affinity_code = encode_affinity(  # Let it be kwargs?
    n_cpu_cores=6,
    n_gpu=2,
    hyperthread_offset=8,
    n_socket=1,
    # cpu_per_run=4,
)
runs_per_setting = 2
experiment_title = "lstm_test_gpu"
variant_levels = list()

# learning_rate = [7e-4] * 4
# entropy_loss_coeff = [0.01, 0.02, 0.04, 0.08]
# values = list(zip(learning_rate, entropy_loss_coeff))
# dir_names = ["test_{}lr_{}ent".format(*v) for v in values]
# keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")]
# variant_levels.append(VariantLevel(keys, values, dir_names))


learning_rate = [7e-4] * 2
batch_T = [5, 20]
values = list(zip(learning_rate, batch_T))
dir_names = ["test_{}lr_{}T".format(*v) for v in values]
keys = [("algo", "learning_rate"), ("sampler", "batch_T")]
Example #12
0
from ul_gen.agents.discrete_sac_ae_agent import DiscreteSacAEAgent

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--savepath", type=str, default="./ae_data/")

args = parser.parse_args()

os.makedirs(args.savepath, exist_ok=True)

EmptyAgentInfo = namedarraytuple("EmptyAgentInfo", [])
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=1,
    n_socket=1,
)

affinity = affinity_from_code(prepend_run_slot(0, affinity_code))
# Get Params
config = configs["discrete_sac_ae"]

# Setup the data collection pipeline
# Edit the sampler kwargs to get a larger batch size
config["sampler"]["batch_T"] = 24
config["sampler"]["batch_B"] = 16

sampler = GpuSampler(EnvCls=gym.make,
                     env_kwargs=config["env"],
                     CollectorCls=GpuResetCollector,
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=6,
    # hyperthread_offset=24,
    n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_basic_cpu"
variant_levels = list()

games = ["pong", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "dqn"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
Example #14
0
with many different inputs to encode, and see what comes out.

The results will be logged with a folder structure according to the
variant levels constructed here.

"""

from rlpyt.utils.launching.affinity import encode_affinity, quick_affinity_code
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

# Either manually set the resources for the experiment:
affinity_code = encode_affinity(
    n_cpu_core=2,
    n_gpu=0,
    # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
    # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
    cpu_per_run=1,
    set_affinity=True,  # it can help to restrict workers to individual CPUs
)
# Or try an automatic one, but results may vary:
# affinity_code = quick_affinity_code(n_parallel=None, use_gpu=True)

runs_per_setting = 2
experiment_title = "example_6"
variant_levels = list()

# Within a variant level, list each combination explicitly.
learning_rate = [7e-4, 1e-3]
batch_B = [16, 32]
values = list(zip(learning_rate, batch_B))
dir_names = ["example6_{}lr_{}B".format(*v) for v in values]
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py"
affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=4,
    # contexts_per_gpu=1,
    # hyperthread_offset=24,
    # n_socket=2,
    # cpu_per_run=2,
)
runs_per_setting = 4
default_config_key = "ppo_1M_serial"
experiment_title = "ppo_mujoco_v3_serial_hc_tl"
# variant_levels_1M = list()
variant_levels_3M = list()

# n_steps = [1e6]
# values = list(zip(n_steps))
# dir_names = ["1M"]
# keys = [("runner", "n_steps")]
# variant_levels_1M.append(VariantLevel(keys, values, dir_names))

bootstrap_tls = [True]
values = list(zip(bootstrap_tls))
dir_names = ["bootstrap_timelimit"]
keys = [("algo", "bootstrap_timelimit")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

affinity_code = encode_affinity(
    n_cpu_cores=2,
    n_gpu=0,
    hyperthread_offset=2,
    n_socket=1,
    cpu_per_run=1,
)
runs_per_setting = 2
variant_levels = list()

env_ids = ["Hopper-v2"]  # , "Swimmer-v3"]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "ddpg_from_td3_1M_serial"
script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py"
experiment_title = "ddpg_mujoco"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py"
affinity_code = encode_affinity(
    n_cpu_core=20,
    n_gpu=4,
    contexts_per_gpu=1,
    n_socket=2,
)

runs_per_setting = 1
default_config_key = "sac_pixels_clothv8"
experiment_title = "sac_dm_control_pixels_clothv8"
variant_levels = list()

domain = ['cloth_v8']
task = ['easy']
values = list(zip(domain, task))
dir_names = ["env_{}_{}".format(*v) for v in values]
keys = [('env', 'domain'), ('env', 'task')]
variant_levels.append(VariantLevel(keys, values, dir_names))

modes = ['corners', 'border', '3x3', '9x9']
values = list(zip(modes))
dir_names = ['mode_{}'.format(*v) for v in values]
keys = [('env', 'task_kwargs', 'mode')]
variant_levels.append(VariantLevel(keys, values, dir_names))

#distance_weight = [0.0, 2.0]
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=8,
    async_sample=True,
    sample_gpu_per_run=0,
    gpu_per_run=1,
    hyperthread_offset=24,
    # n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_async_cpu"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "cpu"

run_experiments(
    script=script,
    affinity_code=affinity_code,
Example #19
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

default_config_key = "td3_1M_serial"
script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py"
experiment_title = "td3_mujoco_v3"

affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=4,
    hyperthread_offset=20,
    n_socket=1,
    cpu_per_run=1,
    contexts_per_gpu=1,
)
runs_per_setting = 2
variant_levels_1M = list()
variant_levels_3M = list()

n_steps = [3e6]
values = list(zip(n_steps))
dir_names = ["3M"]
keys = [("runner", "n_steps")]
variant_levels_3M.append(VariantLevel(keys, values, dir_names))

n_steps = [1e6]
values = list(zip(n_steps))
dir_names = ["1M"]
keys = [("runner", "n_steps")]
variant_levels_1M.append(VariantLevel(keys, values, dir_names))
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=8,
    contexts_per_gpu=2,
    hyperthread_offset=24,
    n_socket=2,
)
runs_per_setting = 3
variant_levels = list()

env_ids = [
    "Hopper-v3", "HalfCheetah-v3", "Walker2d-v3", "Ant-v3", "Humanoid-v3"
]
values = list(zip(env_ids))
dir_names = ["env_{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "ddpg_from_td3_1M_serial"
script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py"
experiment_title = "ddpg_mujoco"

run_experiments(
    script=script,
    affinity_code=affinity_code,
Example #21
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel
import ul_gen
import os

affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=0,
    # hyperthread_offset=20,
    contexts_per_gpu=0,
    n_socket=1
    # cpu_per_run=2,
)

runs_per_setting = 3

variant_levels = list()

tasks = ['gym:CartPole-v1']
values = list(zip(tasks))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "id")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

print("Variants", variants)
print("Log_dirs", log_dirs)

script = "launch/train_discrete_sac.py"
Example #22
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py"
affinity_code = encode_affinity(
    n_cpu_core=4,
    n_gpu=1,
    hyperthread_offset=8,
    n_socket=1,
    # cpu_per_run=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_basic"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "dqn"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
Example #23
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac_autoreg.py"
affinity_code = encode_affinity(
    n_cpu_core=16,
    n_gpu=4,
    contexts_per_gpu=2,
)

runs_per_setting = 2
default_config_key = "sac_state_clothv7"
experiment_title = "sac_dm_control_state_clothv7"
variant_levels = list()

modes = ['corners', 'border', 'inner_border', '3x3', '5x5', '9x9']
values = list(zip(modes))
dir_names = ['mode_{}'.format(*v) for v in values]
keys = [('env', 'task_kwargs', 'mode')]
variant_levels.append(VariantLevel(keys, values, dir_names))

distance_weight = [0.0, 2.0]
values = list(zip(distance_weight))
dir_names = ['distance_weight_{}'.format(*v) for v in values]
keys = [('env', 'task_kwargs', 'distance_weight')]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/atari/dqn/train/football_dqn_async_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=24,
    n_gpu=8,
    async_sample=True,
    sample_gpu_per_run=2,
    gpu_per_run=2,
    # hyperthread_offset=24,
    # optim_sample_share_gpu=True,
    # n_socket=2,
)
runs_per_setting = 2
experiment_title = "atari_dqn_async_gpu"
variant_levels = list()

games = ["pong"]  # , "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

priorities = [False, True]
values = list(zip(priorities))
dir_names = ["pri_{}".format(*v) for v in values]
keys = [("algo", "prioritized_replay")]
variant_levels.append(VariantLevel(keys, values, dir_names))
Example #25
0
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import make_variants, VariantLevel

script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py"
affinity_code = encode_affinity(
    n_cpu_core=20,
    n_gpu=1,
    contexts_per_gpu=1,
)

runs_per_setting = 4
default_config_key = "sac_pixels_cloth_corner"
experiment_title = "pixels_cloth_point_rolled_back_lower_starting"
variant_levels = list()

domain = ['cloth_point']
task = ['easy']
values = list(zip(domain, task))
dir_names = ["env_{}_{}".format(*v) for v in values]
keys = [('env', 'domain'), ('env', 'task')]
variant_levels.append(VariantLevel(keys, values, dir_names))

model_cls = ['PiConvModel']  #, 'GumbelPiConvModel']
random_location = [True]  #, False]
sac_module = ['sac_v2']  #, 'sac_v2_generic']
sac_agent_module = ['sac_agent_v2']  #, 'sac_agent_v2_generic']
state_keys = [['location', 'pixels']]  #, ['pixels']]
values = list(
    zip(model_cls, random_location, sac_module, sac_agent_module, state_keys))
dir_names = ["model_cls_{}_rnd_loc_{}".format(*v) for v in values]
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py"
affinity_code = encode_affinity(n_cpu_core=16,
                                n_gpu=4,
                                hyperthread_offset=20,
                                n_socket=2
                                # cpu_per_run=2,
                                )
runs_per_setting = 2
experiment_title = "atari_ff_a2c_basic"
variant_levels = list()

games = ["pong", "seaquest", "qbert", "chopper_command"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "0"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,
from rlpyt.utils.launching.affinity import encode_affinity
from rlpyt.utils.launching.exp_launcher import run_experiments
from rlpyt.utils.launching.variant import VariantLevel, make_variants

script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py"
affinity_code = encode_affinity(
    n_cpu_core=12,
    n_gpu=1,
    hyperthread_offset=20,
    n_socket=1,
)
runs_per_setting = 2
experiment_title = "atari_r2d1_long_4tr"
variant_levels = list()

games = ["gravitar"]
values = list(zip(games))
dir_names = ["{}".format(*v) for v in values]
keys = [("env", "game")]
variant_levels.append(VariantLevel(keys, values, dir_names))

variants, log_dirs = make_variants(*variant_levels)

default_config_key = "r2d1_long_4tr"

run_experiments(
    script=script,
    affinity_code=affinity_code,
    experiment_title=experiment_title,
    runs_per_setting=runs_per_setting,
    variants=variants,