Ejemplo n.º 1
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="ppo_16env",
    experiment_title="exp",
    snapshot_mode="none",
    snapshot_gap=None,
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    # Hack that the first part of the log_dir matches the source of the model
    model_base_dir = config["pretrain"]["model_dir"]
    if model_base_dir is not None:
        raw_log_dir = log_dir.split(experiment_title)[-1].lstrip(
            "/")  # get rid of ~/GitRepos/adam/rlpyt/data/local/<timestamp>/
        model_sub_dir = raw_log_dir.split("/RlFromUl/")[
            0]  # keep the UL part, which comes first
        config["agent"]["state_dict_filename"] = osp.join(
            model_base_dir, model_sub_dir, "run_0/params.pkl")
    pprint.pprint(config)

    sampler = AlternatingSampler(
        EnvCls=DmlabEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        # TrajInfoCls=AtariTrajInfo,
        # eval_env_kwargs=config["env"],  # Same args!
        **config["sampler"])
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"],
                                        **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["level"]
    if snapshot_gap is not None:
        snapshot_gap = int(snapshot_gap)
    with logger_context(
            log_dir,
            run_ID,
            name,
            config,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
    ):
        runner.train()
Ejemplo n.º 2
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AlternatingSampler(EnvCls=gym_make,
                                 env_kwargs=config["env"],
                                 **config["sampler"])
    algo = A2C(optim_kwargs=config["optim"], **config["algo"])
    agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["id"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Ejemplo n.º 3
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    sampler = AlternatingSampler(EnvCls=ProcgenEnv,
                                 env_kwargs=config["env"],
                                 CollectorCls=GpuResetCollector,
                                 **config["sampler"])
    algo = PPO(optim_kwargs=config["optim"], **config["algo"])
    agent = ProcgenFfAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         affinity=affinity,
                         **config["runner"])
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
Ejemplo n.º 4
0
def build_and_train(
    slot_affinity_code="0slt_1gpu_1cpu",
    log_dir="test",
    run_ID="0",
    config_key="ppo_ul_16env",
    snapshot_mode="none",
    snapshot_gap=None,
):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)

    pprint.pprint(config)

    sampler = AlternatingSampler(
        EnvCls=DmlabEnv,
        env_kwargs=config["env"],
        CollectorCls=GpuWaitResetCollector,
        **config["sampler"]
    )
    algo = PpoUl(optim_kwargs=config["optim"], **config["algo"])
    agent = DmlabPgLstmAlternatingAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRl(
        algo=algo, agent=agent, sampler=sampler, affinity=affinity, **config["runner"]
    )
    name = config["env"]["level"]
    if snapshot_gap is not None:
        snapshot_gap = int(snapshot_gap)
    with logger_context(
        log_dir,
        run_ID,
        name,
        config,
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
    ):
        runner.train()