예제 #1
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    if slot_affinity_code is 'None':
        # affinity = affinity_from_code(run_slot_affinity_code)
        slot_affinity_code = prepend_run_slot(0, affinity_code)
        affinity = affinity_from_code(slot_affinity_code)
    else:
        affinity = affinity_from_code(slot_affinity_code)

    config = configs[config_key]

    # load variant of experiment (there may not be a variant, though)
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuSampler(
        EnvCls=make_env,
        env_kwargs={},
        CollectorCls=CpuResetCollector,
        **config["sampler"]
    )
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #2
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    assert isinstance(affinity, list)  # One for each GPU.
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        **config["sampler"]
    )
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = SyncRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #3
0
def build_and_train(
        slot_affinity_code="0slt_1gpu_1cpu",
        log_dir="test",
        run_ID="0",
        config_key="atari_ats",
        ):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    algo = AugmentedTemporalSimilarity(
        optim_kwargs=config["optim"],
        encoder_kwargs=config["encoder"],
        **config["algo"]
    )
    runner = UnsupervisedLearning(
        algo=algo,
        affinity=affinity,
        **config["runner"]
    )
    name = config["name"]
    with logger_context(log_dir, run_ID, name, config,
            snapshot_mode="last"):
        runner.train()
예제 #4
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        **config["sampler"]
    )
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        seed=int(run_ID) * 1000,
        **config["runner"]
    )
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #5
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AsyncSerialSampler(
        EnvCls=gym_make,
        env_kwargs=config["env"],
        eval_env_kwargs=config["env"],
        CollectorCls=DbCpuResetCollector,
        **config["sampler"]
    )
    algo = DDPG(optim_kwargs=config["optim"], **config["algo"])
    agent = DdpgAgent(**config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_ddpg_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #6
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    print('Config', config)

    if 'pixel_wrapper_kwargs' in config['env']:
        info_keys = config.get('info_keys', None)
        state_keys = config.get('state_keys', None)
        init_namedtuples(info_keys=info_keys, state_keys=state_keys)

    sampler = CpuSampler(EnvCls=DMControlEnv,
                         env_kwargs=config["env"],
                         CollectorCls=CpuResetCollector,
                         eval_env_kwargs=config["env"],
                         **config["sampler"])
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(algo=algo,
                             agent=agent,
                             sampler=sampler,
                             affinity=affinity,
                             **config["runner"])
    name = "sac_{}_{}".format(config['env']['domain'], config['env']['task'])

    with logger_context(log_dir,
                        run_ID,
                        name,
                        log_params=config,
                        snapshot_mode='last'):
        runner.train()
예제 #7
0
def build_and_train(slot_affinity_code, log_dir, run_ID):
    # (Or load from a central store of configs.)
    config = dict(
        env=dict(game="pong"),
        algo=dict(learning_rate=7e-4),
        sampler=dict(batch_B=16),
    )

    affinity = affinity_from_code(slot_affinity_code)
    variant = load_variant(log_dir)
    global config
    config = update_config(config, variant)

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        batch_T=5,
        # batch_B=16,  # Get from config.
        max_decorrelation_steps=400,
        **config["sampler"])
    algo = A2C(**config["algo"])  # Run with defaults.
    agent = AtariFfAgent()
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e5,
        affinity=affinity,
    )
    name = "a2c_" + config["env"]["game"]
    log_dir = "example_6"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #8
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="sac_with_ul",
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    sampler = SerialSampler(
        EnvCls=make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        # TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = SacWithUl(**config["algo"])
    agent = SacAgent(conv_kwargs=config["conv"],
                     fc1_kwargs=config["fc1"],
                     pi_model_kwargs=config["pi_model"],
                     q_model_kwargs=config["q_model"],
                     **config["agent"])
    runner = MinibatchRlEvalEnvStep(algo=algo,
                                    agent=agent,
                                    sampler=sampler,
                                    affinity=affinity,
                                    frame_skip=config["env"]["frame_skip"],
                                    **config["runner"])
    name = config["env"]["domain_name"] + "_" + config["env"]["task_name"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    print('Variant', variant)
    config = update_config(config, variant)

    sampler = SerialSampler(EnvCls=DMControlEnv,
                            env_kwargs=config["env"],
                            CollectorCls=CpuResetCollector,
                            eval_env_kwargs=config["eval_env"],
                            **config["sampler"])
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(algo=algo,
                             agent=agent,
                             sampler=sampler,
                             affinity=affinity,
                             **config["runner"])
    name = "sac_{}_{}".format(config['env']['domain'], config['env']['task'])

    with logger_context(log_dir,
                        run_ID,
                        name,
                        log_params=config,
                        snapshot_mode='last'):
        runner.train()
예제 #10
0
def build_and_train(game="TowerBuilding", run_ID=0, cuda_idx=None):
    # Either manually set the resources for the experiment:
    affinity_code = encode_affinity(
        n_cpu_core=2,
        n_gpu=1,
        # hyperthread_offset=8,  # if auto-detect doesn't work, number of CPU cores
        # n_socket=1,  # if auto-detect doesn't work, can force (or force to 1)
        run_slot=0,
        cpu_per_run=1,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(affinity_code)
    config = configs["r2d1"]
    config["env"]["game"] = game
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(EnvCls=voxel_make,
                              env_kwargs=config["env"],
                              CollectorCls=DbGpuResetCollector,
                              TrajInfoCls=AtariTrajInfo,
                              eval_env_kwargs=config["eval_env"],
                              **config["sampler"])
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    config = dict(game=game)
    name = "r2d1_" + game
    log_dir = "tower_building"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
예제 #11
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="ppo_ul_16env",
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    sampler = SerialSampler(
        EnvCls=AtariEnv84,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = PpoUl(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariPgAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #12
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    
    sampler = GpuSampler(
        EnvCls=gym.make,
        env_kwargs=config["env"],
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = DiscreteSACAE(optim_kwargs=config["optim"], ae_optim_kwargs=config["ae_optim"], **config["algo"])
    agent = DiscreteSacAEAgent(**config["agent"], encoder_kwargs=config["encoder"], model_kwargs=config["actor"], critic_kwargs=config["critic"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["id"]
    
    with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'):
        runner.train()
예제 #13
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    eval_env_config = config["env"].copy()
    eval_env_config["start_level"] = config["env"]["num_levels"] + 100
    eval_env_config["num_levels"] = 100
    sampler = GpuSampler(EnvCls=make,
                         env_kwargs=config["env"],
                         CollectorCls=GpuResetCollector,
                         eval_env_kwargs=eval_env_config,
                         **config["sampler"])

    if config["checkpoint"]:
        model_state_dict = torch.load(config["checkpoint"])
        print("Loaded.")
    else:
        model_state_dict = None

    algo = PPO_AUG_VAE(optim_kwargs=config["optim"], **config["algo"])
    agent = RADPgVaeAgent(ModelCls=RadVaePolicy,
                          model_kwargs=config["model"],
                          initial_model_state_dict=model_state_dict,
                          **config["agent"])
    runner = MinibatchRlEval(algo=algo,
                             agent=agent,
                             sampler=sampler,
                             affinity=affinity,
                             **config["runner"])
    name = config["env"]["id"]

    with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'):
        runner.train()
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = CpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=CpuWaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        **config["sampler"]
    )
    algo = Dreamer(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariDreamerAgent(

        model_kwargs=config["model"], **config["agent"])

    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,

        **config["runner"]
    )
    name = config["env"]["game"] + str(config["sampler"]["batch_T"])
    with logger_context(log_dir, run_ID, name, config):  # Might have to flatten config
        runner.train()
예제 #15
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["algo_name"] = 'A2OC'
    t_env = pomdp_interface(**config["env"])
    config["algo"]["discount"] = t_env.discount

    sampler = GpuSampler(
        EnvCls=pomdp_interface,
        env_kwargs=config["env"],
        **config["sampler"]
    )
    algo = A2OC(optim_kwargs=config["optim"], **config["algo"])
    agent = PomdpOcFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #16
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = CategoricalDQN(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariCatDqnAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #17
0
def launch_experiment(script, run_slot, affinity_code, log_dir, variant,
                      run_ID, args):
    """Launches one learning run using ``subprocess.Popen()`` to call the
    python script.  Calls the script as:
    ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}``
    If ``affinity_code["all_cpus"]`` is provided, then the call is prepended
    with ``tasket -c ..`` and the listed cpus (this is the most sure way to
    keep the run limited to these CPU cores).  Also saves the `variant` file.
    Returns the process handle, which can be monitored.
    """
    slot_affinity_code = prepend_run_slot(run_slot, affinity_code)
    affinity = affinity_from_code(slot_affinity_code)
    call_list = list()
    if isinstance(affinity, dict) and affinity.get("all_cpus", False):
        cpus = ",".join(str(c) for c in affinity["all_cpus"])
    elif isinstance(affinity, list) and affinity[0].get("all_cpus", False):
        cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"])
    else:
        cpus = ()
    if cpus:
        call_list += ["taskset", "-c",
                      cpus]  # PyTorch obeys better than just psutil.
    call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)]
    call_list += [str(a) for a in args]
    save_variant(variant, log_dir)
    print("\ncall string:\n", " ".join(call_list))
    p = subprocess.Popen(call_list)
    return p
예제 #18
0
파일: train_rad.py 프로젝트: jhejna/ul_gen
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    eval_env_config = config["env"].copy()
    eval_env_config["start_level"] = config["env"]["num_levels"] + 100
    eval_env_config["num_levels"] = 100
    sampler = GpuSampler(
        EnvCls=make,
        env_kwargs=config["env"],
        CollectorCls=GpuResetCollector,
        eval_env_kwargs=eval_env_config,
        **config["sampler"]
    )
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = RADPgAgent(ModelCls=RADModel, model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["id"]

    with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'):
        runner.train()
예제 #19
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=DbGpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = R2D1(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariR2d1Agent(model_kwargs=config["model"], **config["agent"])
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "async_gpu_" + config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #20
0
def build_and_train(
        slot_affinity_code="0slt_1gpu_1cpu",
        log_dir="test",
        run_ID="0",
        config_key="dmlab_pc",
        ):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    algo = PixelControl(
        optim_kwargs=config["optim"],
        EncoderCls=DmlabAtcEncoderModel,
        encoder_kwargs=config["encoder"],
        pixel_control_model_kwargs=config["pixel_control_model"],
        **config["algo"]
    )
    runner = UnsupervisedLearning(
        algo=algo,
        affinity=affinity,
        **config["runner"]
    )
    name = config["name"]
    with logger_context(log_dir, run_ID, name, config,
            snapshot_mode="last"):
        runner.train()
예제 #21
0
def build_and_train(
        slot_affinity_code="0slt_0gpu_1cpu_1cpr",
        log_dir="test",
        run_ID="0",
        config_key="LSTM",
        ):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    sampler = CpuSampler(
        EnvCls=safety_gym_make,
        env_kwargs=config["env"],
        TrajInfoCls=SafetyGymTrajInfo,
        **config["sampler"]
    )
    algo = CppoPID(**config["algo"])
    agent = CppoLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"],
    )
    name = "cppo_" + config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #22
0
def launch_experiment(
    script,
    run_slot,
    affinity_code,
    log_dir,
    variant,
    run_ID,
    args,
    python_executable=None,
    set_egl_device=False,
):
    """Launches one learning run using ``subprocess.Popen()`` to call the
    python script.  Calls the script as:
    ``python {script} {slot_affinity_code} {log_dir} {run_ID} {*args}``

    If ``affinity_code["all_cpus"]`` is provided, then the call is prepended
    with ``tasket -c ..`` and the listed cpus (this is the most sure way to
    keep the run limited to these CPU cores).  Also saves the `variant` file.
    Returns the process handle, which can be monitored.
   
    Use ``set_egl_device=True`` to set an environment variable
    ``EGL_DEVICE_ID`` equal to the same value as the cuda index for the
    algorithm.  For example, can use with DMControl environment modified
    to look for this environment variable when selecting a GPU for headless
    rendering.
    """
    slot_affinity_code = prepend_run_slot(run_slot, affinity_code)
    affinity = affinity_from_code(slot_affinity_code)
    call_list = list()
    if isinstance(affinity, dict) and affinity.get("all_cpus", False):
        cpus = ",".join(str(c) for c in affinity["all_cpus"])
    elif isinstance(affinity, list) and affinity[0].get("all_cpus", False):
        cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"])
    else:
        cpus = ()
    if cpus:
        call_list += ["taskset", "-c",
                      cpus]  # PyTorch obeys better than just psutil.
    py = python_executable if python_executable else sys.executable or "python"
    call_list += [py, script, slot_affinity_code, log_dir, str(run_ID)]
    call_list += [str(a) for a in args]
    save_variant(variant, log_dir)
    print("\ncall string:\n", " ".join(call_list))
    print(os.getcwd())
    if set_egl_device and affinity.get("cuda_idx", None) is not None:
        egl_device_id = str(affinity["cuda_idx"])
        egl_env = os.environ.copy()
        egl_env["EGL_DEVICE_ID"] = egl_device_id
        print(f"Assigning EGL_DEVICE_ID={egl_device_id}")
        p = subprocess.Popen(call_list, env=egl_env)
    else:
        p = subprocess.Popen(call_list)
    return p
예제 #23
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="ppo_16env",
    experiment_title="exp",
    snapshot_mode="none",
    snapshot_gap=None,
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    # Hack that the first part of the log_dir matches the source of the model
    model_base_dir = config["pretrain"]["model_dir"]
    if model_base_dir is not None:
        raw_log_dir = log_dir.split(experiment_title)[-1].lstrip(
            "/")  # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/
        model_sub_dir = raw_log_dir.split("/RlFromUl/")[
            0]  # keep the UL part, which comes first
        config["agent"]["state_dict_filename"] = osp.join(
            model_base_dir, model_sub_dir, "run_0/params.pkl")
    pprint.pprint(config)

    sampler = AlternatingSampler(
        EnvCls=DmlabEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        # TrajInfoCls=AtariTrajInfo,
        # eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"],
                                        **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["level"]
    if snapshot_gap is not None:
        snapshot_gap = int(snapshot_gap)
    with logger_context(
            log_dir,
            run_ID,
            name,
            config,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
    ):
        runner.train()
예제 #24
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="serial_radsac",
    experiment_title="exp",
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    # Hack that the first part of the log_dir matches the source of the model
    model_base_dir = config["pretrain"]["model_dir"]
    if model_base_dir is not None:
        raw_log_dir = log_dir.split(experiment_title)[-1].lstrip(
            "/")  # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/
        model_sub_dir = raw_log_dir.split("/RlFromUl/")[
            0]  # keep the UL part, which comes first
        pretrain_ID = config["pretrain"]["run_ID"]
        config["agent"]["state_dict_filename"] = osp.join(
            model_base_dir, model_sub_dir, f"run_{pretrain_ID}/params.pkl")

    pprint.pprint(config)

    sampler = SerialSampler(
        EnvCls=make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"],
    )
    algo = RadSacFromUl(**config["algo"])
    agent = SacAgent(
        conv_kwargs=config["conv"],
        fc1_kwargs=config["fc1"],
        pi_model_kwargs=config["pi_model"],
        q_model_kwargs=config["q_model"],
        **config["agent"],
    )
    runner = MinibatchRlEvalEnvStep(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        frame_skip=config["env"]["frame_skip"],
        **config["runner"],
    )
    name = config["env"]["domain_name"] + "_" + config["env"]["task_name"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #25
0
def run_task(vv, log_dir, exp_name):
    vv = update_env_kwargs(vv)
    run_ID = vv['seed']
    config_key = vv['config_key']
    slot_affinity_code = encode_affinity(
        n_cpu_core=20,
        n_gpu=2,
        n_socket=2,
        run_slot=0,
        set_affinity=True,  # it can help to restrict workers to individual CPUs
    )
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    config.update(**vv)
    # config["env"] = env_arg_dict[config['env_name']]
    vv['env_kwargs']['headless'] = True

    sac_module = 'rlpyt.algos.qpg.{}'.format(config['sac_module'])
    sac_agent_module = 'rlpyt.agents.qpg.{}'.format(config['sac_agent_module'])

    sac_module = importlib.import_module(sac_module)
    sac_agent_module = importlib.import_module(sac_agent_module)

    SAC = sac_module.SAC
    SacAgent = sac_agent_module.SacAgent

    if 'pixel_wrapper_kwargs' in config['env']:
        info_keys = config.get('info_keys', None)
        state_keys = config.get('state_keys', None)
        init_namedtuples(info_keys=info_keys, state_keys=state_keys)

    sampler = CpuSampler(
        EnvCls=SOFTGYM_ENVS[vv['env_name']],
        env_kwargs=vv['env_kwargs'],
        CollectorCls=CpuResetCollector,
        eval_env_kwargs=vv['env_kwargs'],
        **config["sampler"]
    )
    algo = SAC(optim_kwargs=config["optim"], **config["algo"])
    agent = SacAgent(**config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = "sac_{}".format(vv['env_name'])

    with logger_context(log_dir, run_ID, name, log_params=config, snapshot_mode='last'):
        runner.train()
예제 #26
0
def build_and_train(
    slot_affinity_code="0slt_0gpu_4cpu_4cpr",
    log_dir="test",
    run_ID="0",
    config_key="sac_ul_compress",
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    # variant = load_variant(log_dir)
    # config = update_config(config, variant)

    config["algo"]["min_steps_rl"] = 100
    config["algo"]["min_steps_ul"] = 150
    config["algo"]["replay_size"] = 1e4
    config["algo"]["batch_size"] = 64
    config["algo"]["ul_batch_size"] = 32
    config["runner"]["n_steps"] = 1e3
    config["runner"]["log_interval_steps"] = 1e2
    config["sampler"]["eval_n_envs"] = 1
    config["sampler"]["eval_max_steps"] = 500
    config["algo"]["stop_rl_conv_grad"] = True
    config["algo"]["ul_update_schedule"] = "cosine_8"

    pprint.pprint(config)

    sampler = SerialSampler(
        EnvCls=make,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        # TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = SacUl(**config["algo"])
    agent = SacWithUlAgent(conv_kwargs=config["conv"],
                           fc1_kwargs=config["fc1"],
                           pi_model_kwargs=config["pi_model"],
                           q_model_kwargs=config["q_model"],
                           **config["agent"])
    runner = MinibatchRlEvalEnvStep(algo=algo,
                                    agent=agent,
                                    sampler=sampler,
                                    affinity=affinity,
                                    frame_skip=config["env"]["frame_skip"],
                                    **config["runner"])
    name = config["env"]["domain_name"] + "_" + config["env"]["task_name"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #27
0
def build_and_train(slot_affinity_code="0slt_0gpu_4cpu_4cpr",
                    log_dir="test",
                    run_ID="0",
                    config_key="ppo_ul_16env"):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    # variant = load_variant(log_dir)
    # config = update_config(config, variant)

    # config["sampler"]["batch_B"] = 4
    # config["sampler"]["batch_T"] = 5
    # config["runner"]["log_interval_steps"] = 100
    # config["runner"]["n_steps"] = 1000
    config["algo"]["ul_update_schedule"] = "constant_1"
    config["algo"]["min_steps_rl"] = 1e3
    config["algo"]["min_steps_ul"] = 200
    config["algo"]["max_steps_ul"] = 20e6
    config["model"]["stop_conv_grad"] = True
    config["sampler"]["max_decorrelation_steps"] = 0
    config["sampler"]["batch_B"] = 3
    config["sampler"]["batch_T"] = 20
    config["algo"]["ul_pri_alpha"] = 1.
    config["algo"]["ul_pri_n_step_return"] = 10
    config["algo"]["ul_replay_size"] = 900

    pprint.pprint(config)

    sampler = SerialSampler(
        EnvCls=AtariEnv84,
        env_kwargs=config["env"],
        CollectorCls=CpuResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = PpoUl(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariPgRlWithUlAgent(model_kwargs=config["model"],
                                 **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #28
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AlternatingSampler(EnvCls=ProcgenEnv,
                                 env_kwargs=config["env"],
                                 CollectorCls=GpuResetCollector,
                                 **config["sampler"])
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = ProcgenFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
예제 #29
0
def launch_experiment(script, run_slot, affinity_code, log_dir, variant,
                      run_ID, args):
    slot_affinity_code = prepend_run_slot(run_slot, affinity_code)
    affinity = affinity_from_code(slot_affinity_code)
    call_list = list()
    if isinstance(affinity, dict) and affinity.get("all_cpus", False):
        cpus = ",".join(str(c) for c in affinity["all_cpus"])
    elif isinstance(affinity, list) and affinity[0].get("all_cpus", False):
        cpus = ",".join(str(c) for aff in affinity for c in aff["all_cpus"])
    else:
        cpus = ()
    if cpus:
        call_list += ["taskset", "-c",
                      cpus]  # PyTorch obeys better than just psutil.
    call_list += ["python", script, slot_affinity_code, log_dir, str(run_ID)]
    call_list += [str(a) for a in args]
    save_variant(variant, log_dir)
    print("\ncall string:\n", " ".join(call_list))
    p = subprocess.Popen(call_list)
    return p
예제 #30
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    # variant = load_variant(log_dir)
    # config = update_config(config, variant)

    sampler = CpuSampler(EnvCls=AtariEnv,
                         env_kwargs=config["env"],
                         CollectorCls=EpisodicLivesWaitResetCollector,
                         **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"] + str(config["algo"]["entropy_loss_coeff"])
    with logger_context(log_dir, run_ID, name, config):
        runner.train()