コード例 #1
0
def build_and_train(game="pong", run_ID=0, cuda_idx=None):
    sampler = SerialSampler(
        EnvCls=AtariEnv,
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=
        4,  # Four time-steps per sampler iteration. 在collector中采样数据的时候每个循环走多少个step
        batch_B=1,  # 有多少个并行的environment实例
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3)  # Run with defaults.
    agent = AtariDqnAgent()  # 在sampler中initialize
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,  # 总共多少个step
        log_interval_steps=1e3,  # 每多少个step记录一次日志
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    log_dir = "example_1"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #2
0
ファイル: example_1.py プロジェクト: wwxFromTju/rlpyt
def build_and_train(game="pong", run_ID=0, cuda_idx=None):
    sampler = SerialSampler(
        EnvCls=AtariEnv,
        env_kwargs=dict(game=game),
        CollectorCls=ResetCollector,
        eval_env_kwargs=dict(game=game),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3)  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    log_dir = "example_1"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #3
0
ファイル: test_rlpyt.py プロジェクト: nirbhayjm/rlpyt
def test_rlpyt_simple():
    """ partially copied from example 1 """
    game = "pong"
    run_ID = 0
    cuda_idx = None
    n_steps = 1
    sampler = SerialSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,  # default traj info + GameScore
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3, replay_size=1e3)  # remove memory issues
    agent = AtariDqnAgent()
    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=n_steps,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    log_dir = "test_example_1"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #4
0
ファイル: DQN.py プロジェクト: SaharCarmel/football
def build_and_train(game="academy_empty_goal_close", run_ID=0, cuda_idx=None):
    sampler = SerialSampler(
        EnvCls=create_single_football_env,
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3)  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    log_dir = "example_1"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #5
0
ファイル: example_1_mmm.py プロジェクト: MauriceManning/rlpyt
def build_and_train(game="pong", run_ID=0, cuda_idx=None):
    sampler = SerialSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,  # default traj info + GameScore
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3)  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    #log_dir = "example_1"
    log_dir = get_outputs_path()
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #6
0
def build_and_train(game="pong", run_ID=0, cuda_idx=None, n_parallel=2):
    config = dict(
        env=dict(game=game),
        algo=dict(batch_size=128),
        sampler=dict(batch_T=2, batch_B=32),
    )
    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=dict(game=game),
        CollectorCls=GpuWaitResetCollector,
        eval_env_kwargs=dict(game=game),
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
        # batch_T=4,  # Get from config.
        # batch_B=1,
        **config[
            "sampler"]  # More parallel environments for batched forward-pass.
    )
    algo = DQN(**config["algo"])  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel))),
    )
    name = "dqn_" + game
    log_dir = "example_5"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #7
0
def build_and_train(slot_affinity_code, log_dir, run_ID, config_key):
    affinity = affinity_from_code(slot_affinity_code)
    config = configs[config_key]
    variant = load_variant(log_dir)
    config = update_config(config, variant)
    config["eval_env"]["game"] = config["env"]["game"]

    sampler = GpuSampler(
        EnvCls=AtariEnv,
        env_kwargs=config["env"],
        CollectorCls=WaitResetCollector,
        TrajInfoCls=AtariTrajInfo,
        eval_env_kwargs=config["eval_env"],
        **config["sampler"]
    )
    algo = DQN(optim_kwargs=config["optim"], **config["algo"])
    agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"])
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        affinity=affinity,
        **config["runner"]
    )
    name = config["env"]["game"]
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #8
0
ファイル: rlpyt_train_ftwc.py プロジェクト: gstrazds/twagents
def build_and_train(cfg, game="ftwc", run_ID=0):
    #GVS NOTE: for ftwc/qait ?use CpuWaitResetCollector  (or CpuResetCollector)
    sampler = SerialSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,  # default traj info + GameScore
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=10,
        eval_max_steps=int(10e2),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e2)  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=dict(cuda_idx=cfg.cuda_idx),
    )
    config = dict(game=game)
    name = "dqn_" + game
    log_dir = "ftwc"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #9
0
ファイル: lab_example_1.py プロジェクト: DavidMChan/rlpyt
def build_and_train(level="nav_maze_random_goal_01", run_ID=0, cuda_idx=None):
    sampler = SerialSampler(
        EnvCls=DeepmindLabEnv,
        env_kwargs=dict(level=level),
        eval_env_kwargs=dict(level=level),
        batch_T=4,  # Four time-steps per sampler iteration.
        batch_B=1,
        max_decorrelation_steps=0,
        eval_n_envs=5,
        eval_max_steps=int(10e3),
        eval_max_trajectories=5,
    )
    algo = DQN(min_steps_learn=1e3)  # Run with defaults.
    agent = AtariDqnAgent()
    runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e5,
        affinity=dict(cuda_idx=cuda_idx),
    )
    config = dict(level=level)
    name = "lab_dqn"
    log_dir = "lab_example_1"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #10
0
def build_and_train(run_ID=0, cuda_idx=None, n_parallel=2, serial_sampling=False):
    affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel)))
    device = "cpu" if cuda_idx is None else f"gpu {cuda_idx}"
    if serial_sampling:
        Sampler = SerialSampler  # ignores workers_cpus
        print(f"Using serial sampler w/ {device} for action sampling and optimization")
    else:
        Sampler = CpuSampler if cuda_idx is None else GpuSampler
        print(f"Using parallel sampler w/ {device} for action sampling and optimization")

    game = "pong"

    sampler = Sampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,  # default traj info + GameScore
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=1,
        batch_B=8,  # number of game running in parallel
        max_decorrelation_steps=0
    )

    # load target observations along with respective thresholds and target actions
    target_obs = []
    target_info = {}
    for idx, (tpath, tthresh, ttarg) in enumerate(TARGET_META):
        with open(tpath, "rb") as f:
            tob = pickle.load(f)
            target_obs.append(tob)
            target_info[idx] = (tthresh, ttarg)
    target_obs = np.asarray(target_obs).transpose(0, 3, 1, 2)  # N, H, W, C --> N, C, H, W

    # adversary algorithm (subsumes agent DQN algorithm)
    algo = FixedAttackerDQN(
        target_obs,
        target_info,
        contrast_sd_path,
        dqn_oracle_sd_path,
        delta_bound=1.0,
        first_poison_itr=1,
        min_steps_learn=1e3
    )

    agent = AtariDqnAgent()

    runner = MinibatchRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=1e3,
        affinity=affinity
    )

    config = dict(game=game)
    name = "rp_fixed_attack_dqn_" + game
    log_dir = "rp_fixed_attack"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #11
0
def build_and_train(game="pong", run_ID=0):
    # Change these inputs to match local machine and desired parallelism.
    affinity = make_affinity(
        run_slot=0,
        n_cpu_core=8,  # Use 16 cores across all experiments.
        n_gpu=2,  # Use 8 gpus across all experiments.
        gpu_per_run=1,
        sample_gpu_per_run=1,
        async_sample=True,
        optim_sample_share_gpu=False,
        # hyperthread_offset=24,  # If machine has 24 cores.
        # n_socket=2,  # Presume CPU socket affinity to lower/upper half GPUs.
        # gpu_per_run=2,  # How many GPUs to parallelize one run across.
        # cpu_per_run=1,
    )

    sampler = AsyncGpuSampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,
        env_kwargs=dict(game=game),
        batch_T=5,
        batch_B=36,
        max_decorrelation_steps=100,
        eval_env_kwargs=dict(game=game),
        eval_n_envs=2,
        eval_max_steps=int(10e3),
        eval_max_trajectories=4,
    )
    algo = DQN(
        replay_ratio=8,
        min_steps_learn=1e4,
        replay_size=int(1e5)
    )
    agent = AtariDqnAgent()
    runner = AsyncRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=2e6,
        log_interval_steps=1e4,
        affinity=affinity,
    )
    config = dict(game=game)
    name = "async_dqn_" + game
    log_dir = "async_dqn"
    with logger_context(log_dir, run_ID, name, config):
        runner.train()
コード例 #12
0
def build_and_train(run_ID=0,
                    cuda_idx=None,
                    n_parallel=2,
                    serial_sampling=False):
    affinity = dict(cuda_idx=cuda_idx, workers_cpus=list(range(n_parallel)))
    device = "CPU" if cuda_idx is None else f"GPU {cuda_idx}"
    if serial_sampling:
        Sampler = SerialSampler  # ignores workers_cpus
        print(
            f"Using serial sampler w/ {device} for action sampling and optimization"
        )
    else:
        Sampler = CpuSampler if cuda_idx is None else GpuSampler
        print(
            f"Using parallel sampler w/ {device} for action sampling and optimization"
        )

    game = "pong"

    sampler = Sampler(
        EnvCls=AtariEnv,
        TrajInfoCls=AtariTrajInfo,  # default traj info + GameScore
        env_kwargs=dict(game=game),
        eval_env_kwargs=dict(game=game),
        batch_T=1,
        batch_B=8,  # number of game running in parallel
        max_decorrelation_steps=0)

    ### ALGO GOES HERE
    algo = None

    agent = AtariDqnAgent()

    runner = MinibatchRl(algo=algo,
                         agent=agent,
                         sampler=sampler,
                         n_steps=50e6,
                         log_interval_steps=1e3,
                         affinity=affinity)

    config = dict(game=game)
    name = "rp_attack_dqn_" + game
    log_dir = "rp_attack"
    with logger_context(log_dir, run_ID, name, config, snapshot_mode="last"):
        runner.train()
コード例 #13
0
def build_and_train(id="SurfaceCode-v0", name='run', log_dir='./logs'):
    # Change these inputs to match local machine and desired parallelism.
    # affinity = make_affinity(
    #     n_cpu_core=24,  # Use 16 cores across all experiments.
    #     n_gpu=1,  # Use 8 gpus across all experiments.
    #     async_sample=True,
    #     set_affinity=True
    # )
    # affinity['optimizer'][0]['cuda_idx'] = 1
    num_cpus = multiprocessing.cpu_count()
    affinity = make_affinity(n_cpu_core=num_cpus//2, cpu_per_run=num_cpus//2, n_gpu=0, async_sample=False,
                                 set_affinity=True)
    affinity['workers_cpus'] = tuple(range(num_cpus))
    affinity['master_torch_threads'] = 28
    # env_kwargs = dict(id='SurfaceCode-v0', error_model='X', volume_depth=5)
    state_dict = None # torch.load('./logs/run_29/params.pkl', map_location='cpu')
    agent_state_dict = None #state_dict['agent_state_dict']['model']
    optim_state_dict = None #state_dict['optimizer_state_dict']

    # sampler = AsyncCpuSampler(
    sampler = CpuSampler(
        # sampler=SerialSampler(
        EnvCls=make_qec_env,
        # TrajInfoCls=AtariTrajInfo,
        env_kwargs=dict(error_rate=0.005, error_model='DP'),
        batch_T=10,
        batch_B=num_cpus * 10,
        max_decorrelation_steps=100,
        eval_env_kwargs=dict(error_rate=0.005, error_model='DP', fixed_episode_length=5000),
        eval_n_envs=num_cpus,
        eval_max_steps=int(1e6),
        eval_max_trajectories=num_cpus,
        TrajInfoCls=EnvInfoTrajInfo
    )
    algo = DQN(
        replay_ratio=8,
        learning_rate=1e-5,
        min_steps_learn=1e4,
        replay_size=int(5e4),
        batch_size=32,
        double_dqn=True,
        # target_update_tau=0.002,
        target_update_interval=5000,
        ReplayBufferCls=UniformReplayBuffer,
        initial_optim_state_dict=optim_state_dict,
        eps_steps=2e6,
    )
    agent = AtariDqnAgent(model_kwargs=dict(channels=[32, 64, 64],
                                            kernel_sizes=[3, 2, 2],
                                            strides=[2, 1, 1],
                                            paddings=[0, 0, 0],
                                            fc_sizes=[512, ],
                                            dueling=True),
                          ModelCls=QECModel,
                          eps_init=1,
                          eps_final=0.02,
                          eps_itr_max=int(5e6),
                          eps_eval=0,
                          initial_model_state_dict=agent_state_dict)
    # agent = DqnAgent(ModelCls=FfModel)
    runner = QECSynchronousRunner(
        # runner = MinibatchRlEval(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=1e9,
        log_interval_steps=3e5,
        affinity=affinity,
    )
    config = dict(game=id)
    config_logger(log_dir, name=name, snapshot_mode='last', log_params=config)
    # with logger_context(log_dir, run_ID, name, config):
    runner.train()