Esempio n. 1
0
 def _build_parallel_ctrl(self, n_worker):
     self.ctrl = AttrDict(
         quit=mp.RawValue(ctypes.c_bool, False),
         barrier_in=mp.Barrier(n_worker + 1),
         barrier_out=mp.Barrier(n_worker + 1),
         do_eval=mp.RawValue(ctypes.c_bool, False),
         itr=mp.RawValue(ctypes.c_long, 0),
     )
     self.traj_infos_queue = mp.Queue()
     self.eval_traj_infos_queue = mp.Queue()
     self.sync = AttrDict(stop_eval=mp.RawValue(ctypes.c_bool, False))
Esempio n. 2
0
    def initialize(self, affinity):
        """Initialization inside the main sampler process.  Sets process hardware
        affinities, creates specified number of environment instances and instantiates
        the collector with them.  If applicable, does the same for evaluation
        environment instances.  Moves the agent to device (could be GPU), and 
        calls on ``agent.async_cpu()`` initialization.  Starts up collector.
        """
        p = psutil.Process()
        if affinity.get("set_affinity", True):
            p.cpu_affinity(affinity["master_cpus"])
        # torch.set_num_threads(affinity["master_torch_threads"])
        torch.set_num_threads(1)  # Needed to prevent MKL hang :( .
        B = self.batch_spec.B
        envs = [self.EnvCls(**self.env_kwargs) for _ in range(B)]
        sync = AttrDict(
            db_idx=AttrDict(value=0))  # Mimic the mp.RawValue format.
        collector = self.CollectorCls(
            rank=0,
            envs=envs,
            samples_np=self.double_buffer,
            batch_T=self.batch_spec.T,
            TrajInfoCls=self.TrajInfoCls,
            agent=self.agent,
            sync=sync,
        )
        if self.eval_n_envs > 0:
            eval_envs = [
                self.EnvCls(**self.eval_env_kwargs)
                for _ in range(self.eval_n_envs)
            ]
            eval_CollectorCls = self.eval_CollectorCls or SerialEvalCollector
            self.eval_collector = eval_CollectorCls(
                envs=eval_envs,
                agent=self.agent,
                TrajInfoCls=self.TrajInfoCls,
                max_T=self.eval_max_steps // self.eval_n_envs,
                max_trajectories=self.eval_max_trajectories,
            )
        self.agent.to_device(cuda_idx=affinity.get("cuda_idx", None))
        self.agent.async_cpu(share_memory=False)

        agent_inputs, traj_infos = collector.start_envs(
            self.max_decorrelation_steps)
        collector.start_agent()

        self.collector = collector
        self.agent_inputs = agent_inputs
        self.traj_infos = traj_infos
        self.sync = sync
        logger.log("Serial sampler initialized.")
Esempio n. 3
0
 def launch_memcpy(self, sample_buffers, replay_buffer):
     """
     Fork a Python process for each of the sampler double buffers.  (It may
     be overkill to use two separate processes here, may be able to simplify
     to one and still get good performance.)
     """
     procs = list()
     for i in range(len(sample_buffers)):  # (2 for double-buffer.)
         ctrl = AttrDict(
             quit=self.ctrl.quit,
             sample_ready=self.ctrl.sample_ready[i],
             sample_copied=self.ctrl.sample_copied[i],
         )
         procs.append(
             mp.Process(
                 target=memory_copier,
                 args=(
                     sample_buffers[i],
                     self.algo.samples_to_buffer,
                     replay_buffer,
                     ctrl,
                 ),
             ))
     for p in procs:
         p.start()
     self.memcpy_procs = procs
Esempio n. 4
0
 def _build_parallel_ctrl(self, n_worker):
     self.ctrl = AttrDict(
         quit=mp.RawValue(ctypes.c_bool, False),
         barrier_in=mp.Barrier(n_worker + 1),
         barrier_out=mp.Barrier(n_worker + 1),
         do_eval=mp.RawValue(ctypes.c_bool, False),
         itr=mp.RawValue(ctypes.c_long,
                         0),  # TODO SAVE state of curriculum?
     )
     self.traj_infos_queue = mp.Queue()
     self.eval_traj_infos_queue = mp.Queue()
     self.sync = AttrDict(stop_eval=mp.RawValue(ctypes.c_bool, False),
                          glob_average_return=mp.Value('d', 0.0),
                          curriculum_stage=mp.Value('i', 0),
                          difficulty=mp.Value('d', 0.0),
                          seeds=mp.Array('i', n_worker))
Esempio n. 5
0
def assemble_workers_kwargs(affinity, seed, samples_np, n_envs_list,
                            step_buffer_np, sync, eval_n_envs,
                            eval_step_buffer_np):
    workers_kwargs = list()
    i_env = 0
    for rank in range(len(affinity["workers_cpus"])):
        n_envs = n_envs_list[rank]
        slice_B = slice(i_env, i_env + n_envs)
        w_sync = AttrDict(
            step_blocker=sync.step_blockers[rank],
            act_waiter=sync.act_waiters[rank],
            stop_eval=sync.stop_eval,
        )
        worker_kwargs = dict(
            rank=rank,
            seed=seed + rank,
            cpus=affinity["workers_cpus"][rank],
            n_envs=n_envs,
            samples_np=samples_np[:, slice_B],
            step_buffer_np=step_buffer_np[slice_B],
            sync=w_sync,
        )
        i_env += n_envs
        if eval_n_envs > 0:
            eval_slice_B = slice(rank * eval_n_envs, (rank + 1) * eval_n_envs)
            worker_kwargs["eval_step_buffer_np"] = eval_step_buffer_np[
                eval_slice_B]
        workers_kwargs.append(worker_kwargs)
    return workers_kwargs
Esempio n. 6
0
def sampling_process(common_kwargs, worker_kwargs):
    """Arguments fed from the Sampler class in master process."""
    c, w = AttrDict(**common_kwargs), AttrDict(**worker_kwargs)
    initialize_worker(w.rank, w.seed, w.cpus, c.torch_threads,
                      w.get("group", None))
    envs = [c.EnvCls(**c.env_kwargs) for _ in range(w.n_envs)]
    collector = c.CollectorCls(
        rank=w.rank,
        envs=envs,
        samples_np=w.samples_np,
        batch_T=c.batch_T,
        TrajInfoCls=c.TrajInfoCls,
        agent=c.get("agent", None),  # Optional depending on parallel setup.
        sync=w.get("sync", None),
        step_buffer_np=w.get("step_buffer_np", None),
    )
    agent_inputs, traj_infos = collector.start_envs(c.max_decorrelation_steps)
    collector.start_agent()

    eval_envs = [c.EnvCls(**c.eval_env_kwargs) for _ in range(c.eval_n_envs)]
    if eval_envs:  # May do evaluation.
        eval_collector = c.eval_CollectorCls(
            rank=w.rank,
            envs=eval_envs,
            TrajInfoCls=c.TrajInfoCls,
            traj_infos_queue=c.traj_infos_queue,
            max_T=c.eval_max_T,
            agent=c.get("agent", None),
            sync=w.get("sync", None),
            step_buffer_np=w.get("eval_step_buffer_np", None),
        )

    ctrl = c.ctrl
    ctrl.barrier_out.wait()
    while True:
        agent_inputs = collector.reset_if_needed(
            agent_inputs)  # Outside barrier?
        ctrl.barrier_in.wait()
        if ctrl.quit.value:
            break
        if ctrl.do_eval.value:
            eval_collector.collect_evaluation(
                ctrl.itr.value)  # Traj_infos to queue inside.
        else:
            agent_inputs, traj_infos, completed_infos = collector.collect_batch(
                agent_inputs, traj_infos, ctrl.itr.value)
            for info in completed_infos:
                c.traj_infos_queue.put(info)
        ctrl.barrier_out.wait()

    for env in envs + eval_envs:
        env.close()
Esempio n. 7
0
 def build_par_objs(self, world_size):
     barrier = mp.Barrier(world_size)
     traj_infos_queue = mp.Queue()
     par = AttrDict(
         barrier=barrier,
         traj_infos_queue=traj_infos_queue,
     )
     return par
Esempio n. 8
0
def build_affinities_gpu_1cpu_drive(slt, gpu, cpu, cxg=1, gpr=1, cpw=1,
        hto=None, skt=1):
    """OLD.
    Divides CPUs evenly among GPUs, with one CPU held open for each GPU, to
    drive it.  Workers assigned on the remaining CPUs.  Master permitted to use
    driver core + worker cores (good in case of multi-context per GPU and old
    alternating action server sampler, from accel_rl). GPU-driving CPUs grouped
    at the lowest numbered cores of each CPU socket.
    """
    if gpr > 1:
        raise NotImplementedError  # (parallel training)
    n_ctx = gpu * cxg
    n_run_slots = n_ctx // gpr
    assert slt < n_run_slots
    cpu_per_gpu = cpu // gpu
    sim_cpu_per_gpu = cpu_per_gpu - 1
    n_sim_cpu = cpu - gpu
    sim_cpu_per_ctx = n_sim_cpu // n_ctx

    assert gpu >= skt
    assert gpu % skt == 0
    gpu_per_skt = gpu // skt
    assert cpu % skt == 0
    cpu_per_skt = cpu // skt

    my_ctx = slt  # Different for multi-context run, not implemented.
    my_gpu = my_ctx // cxg
    my_skt = my_gpu // gpu_per_skt
    gpu_in_skt = my_gpu % gpu_per_skt
    gpu_core = gpu_in_skt + my_skt * cpu_per_skt
    ctx_in_gpu = my_ctx % cxg

    min_sim_core = (my_skt * cpu_per_skt + gpu_per_skt +
        gpu_in_skt * sim_cpu_per_gpu + ctx_in_gpu * sim_cpu_per_ctx)
    sim_cores = tuple(range(min_sim_core, min_sim_core + sim_cpu_per_ctx))

    assert len(sim_cores) % cpw == 0
    if hto is None:
        hto = cpu
    if hto > 0:
        hyperthreads = tuple(c + hto for c in sim_cores)
        workers_cpus = tuple(sim_cores[i:i + cpw] + hyperthreads[i:i + cpw]
            for i in range(0, len(sim_cores), cpw))
        master_cpus = (gpu_core,) + sim_cores + (gpu_core + hto,) + hyperthreads
    else:
        workers_cpus = tuple(sim_cores[i:i + cpw]
            for i in range(0, len(sim_cores), cpw))
        master_cpus = (gpu_core,) + sim_cores

    affinity = AttrDict(
        all_cpus=master_cpus,
        master_cpus=master_cpus,
        workers_cpus=workers_cpus,
        master_torch_threads=1,
        worker_torch_threads=cpw,
        cuda_idx=my_gpu,
    )
    return affinity
Esempio n. 9
0
    def initialize(self, affinity):
        p = psutil.Process()
        if affinity.get("set_affinity", True):
            p.cpu_affinity(affinity["master_cpus"])
        # torch.set_num_threads(affinity["master_torch_threads"])
        torch.set_num_threads(1)  # Needed to prevent MKL hang :( .
        B = self.batch_spec.B
        envs = [self.EnvCls(**self.env_kwargs) for _ in range(B)]
        sync = AttrDict(
            db_idx=AttrDict(value=0))  # Mimic the mp.RawValue format.
        collector = self.CollectorCls(
            rank=0,
            envs=envs,
            samples_np=self.double_buffer,
            batch_T=self.batch_spec.T,
            TrajInfoCls=self.TrajInfoCls,
            agent=self.agent,
            sync=sync,
        )
        if self.eval_n_envs > 0:
            eval_envs = [
                self.EnvCls(**self.eval_env_kwargs)
                for _ in range(self.eval_n_envs)
            ]
            eval_CollectorCls = self.eval_CollectorCls or SerialEvalCollector
            self.eval_collector = eval_CollectorCls(
                envs=eval_envs,
                agent=self.agent,
                TrajInfoCls=self.TrajInfoCls,
                max_T=self.eval_max_steps // self.eval_n_envs,
                max_trajectories=self.eval_max_trajectories,
            )
        self.agent.to_device(cuda_idx=affinity.get("cuda_idx", None))
        self.agent.async_cpu(share_memory=False)

        agent_inputs, traj_infos = collector.start_envs(
            self.max_decorrelation_steps)
        collector.start_agent()

        self.collector = collector
        self.agent_inputs = agent_inputs
        self.traj_infos = traj_infos
        self.sync = sync
        logger.log("Serial sampler initialized.")
Esempio n. 10
0
 def build_par_objs(self, n_runners):
     barrier = mp.Barrier(n_runners)
     traj_infos_queue = mp.Queue()
     mgr = mp.Manager()
     mgr_dict = mgr.dict()  # For any other comms.
     par = AttrDict(
         barrier=barrier,
         traj_infos_queue=traj_infos_queue,
         dict=mgr_dict,
     )
     return par
Esempio n. 11
0
    def launch_workers(self, double_buffer, traj_infos_queue, affinity, seed,
                       n_envs_list, eval_n_envs_per):
        n_worker = len(affinity["workers_cpus"])
        sync = AttrDict(
            step_blockers=[mp.Semaphore(0) for _ in range(n_worker)],
            act_waiters=[mp.Semaphore(0) for _ in range(n_worker)],
            stop_eval=mp.RawValue(ctypes.c_bool, False),
        )
        step_buffer_pyt, step_buffer_np = build_step_buffer(
            self.examples, sum(n_envs_list))

        if self.eval_n_envs_per > 0:
            eval_n_envs = self.eval_n_envs_per * n_worker
            eval_step_buffer_pyt, eval_step_buffer_np = build_step_buffer(
                self.examples, eval_n_envs)
            self.eval_step_buffer_pyt = eval_step_buffer_pyt
            self.eval_step_buffer_np = eval_step_buffer_np
        else:
            eval_step_buffer_np = None

        common_kwargs = dict(
            EnvCls=self.EnvCls,
            env_kwargs=self.env_kwargs,
            agent=None,
            batch_T=self.batch_spec.T,
            CollectorCls=self.CollectorCls,
            TrajInfoCls=self.TrajInfoCls,
            traj_infos_queue=traj_infos_queue,
            ctrl=self.ctrl,
            max_decorrelation_steps=self.max_decorrelation_steps,
            eval_n_envs=self.eval_n_envs_per,
            eval_CollectorCls=self.eval_CollectorCls or EvalCollector,
            eval_env_kwargs=self.eval_env_kwargs,
            eval_max_T=self.eval_max_T,
        )
        workers_kwargs = assemble_workers_kwargs(affinity, seed, double_buffer,
                                                 n_envs_list, step_buffer_np,
                                                 sync, self.eval_n_envs_per,
                                                 eval_step_buffer_np)

        workers = [
            mp.Process(target=sampling_process,
                       kwargs=dict(common_kwargs=common_kwargs,
                                   worker_kwargs=w_kwargs))
            for w_kwargs in workers_kwargs
        ]
        for w in workers:
            w.start()

        self.workers = workers
        self.step_buffer_pyt = step_buffer_pyt
        self.step_buffer_np = step_buffer_np
        self.sync = sync
        self.mid_batch_reset = self.CollectorCls.mid_batch_reset
Esempio n. 12
0
 def build_ctrl(self, n_optim_runner):
     opt_throttle = (mp.Barrier(n_optim_runner) if n_optim_runner > 1 else
         None)
     return AttrDict(
         quit=mp.Value('b', lock=True),
         sample_ready=[mp.Semaphore(0) for _ in range(2)],  # Double buffer.
         sample_copied=[mp.Semaphore(1) for _ in range(2)],
         sample_itr=mp.Value('l', lock=True),
         opt_throttle=opt_throttle,
         eval_time=mp.Value('d', lock=True),
         )
Esempio n. 13
0
 def build_ctrl(self, world_size):
     opt_throttle = (mp.Barrier(world_size) if world_size > 1 else
         None)
     return AttrDict(
         quit=mp.Value('b', lock=True),
         quit_opt=mp.RawValue('b'),
         sample_ready=[mp.Semaphore(0) for _ in range(2)],  # Double buffer.
         sample_copied=[mp.Semaphore(1) for _ in range(2)],
         sampler_itr=mp.Value('l', lock=True),
         opt_throttle=opt_throttle,
         eval_time=mp.Value('d', lock=True),
     )
Esempio n. 14
0
 def launch_memcpy(self, sample_buffers, replay_buffer):
     args_list = list()
     for i in range(len(sample_buffers)):
         ctrl = AttrDict(
             quit=self.ctrl.quit,
             sample_ready=self.ctrl.sample_ready[i],
             sample_copied=self.ctrl.sample_copied[i],
         )
         args_list.append((sample_buffers[i], replay_buffer, ctrl))
     procs = [mp.Process(target=memory_copier, args=a) for a in args_list]
     for p in procs:
         p.start()
     self.memcpy_procs = procs
Esempio n. 15
0
def build_par_objs(n, groups=1):
    ctrl = AttrDict(
        quit=mp.RawValue(ctypes.c_bool, False),
        barrier_in=mp.Barrier(n * groups + 1),
        barrier_out=mp.Barrier(n * groups + 1),
        do_eval=mp.RawValue(ctypes.c_bool, False),
        itr=mp.RawValue(ctypes.c_long, 0),
    )
    traj_infos_queue = mp.Queue()

    step_blockers = [[mp.Semaphore(0) for _ in range(n)]
                     for _ in range(groups)]
    act_waiters = [[mp.Semaphore(0) for _ in range(n)] for _ in range(groups)]
    if groups == 1:
        step_blockers = step_blockers[0]
        act_waiters = act_waiters[0]
    sync = AttrDict(
        step_blockers=step_blockers,
        act_waiters=act_waiters,
        stop_eval=mp.RawValue(ctypes.c_bool, False),
    )
    return ctrl, traj_infos_queue, sync
Esempio n. 16
0
 def __init__(self,
              task,
              seed=0,
              headless=True,
              num_envs=0,
              episode_length=1000,
              randomize=False):
     assert task in VALID_TASKS
     base_args = AttrDict(BASE_ARGS)  # Base args except for cfg
     base_args.logdir, base_args.cfg_train, base_args.cfg_env = retrieve_cfg(
         base_args, False)
     base_args.headless = headless
     base_args.task = task
     base_args.seed = seed
     base_args.episode_length = episode_length
     base_args.num_envs = num_envs
     base_args.randomize = randomize
     base_args.logdir, base_args.cfg_train, base_args.cfg_env = retrieve_cfg(
         base_args, False)  # Update configs properly
     cfg, cfg_train, logdir = load_cfg(base_args)
     sim_params = parse_sim_params(base_args, cfg, cfg_train)
     self.task, self.env = parse_task(base_args, cfg, cfg_train,
                                      sim_params)  # Create environment
     self.num_envs = self.env.num_envs  # Number of environments
     self.device = self.env.rl_device  # cuda or cpu
     self._observation_space = IsaacSpaceWrapper(
         num_envs=self.num_envs,
         device=self.env.rl_device,
         space=self.env.observation_space,
         name="obs",
         force_float32=True,
     )
     self._action_space = IsaacSpaceWrapper(
         num_envs=self.num_envs,
         device=self.env.rl_device,
         space=self.env.action_space,
         name="act",
         force_float32=True,
     )
Esempio n. 17
0
 def launch_memcpy(self, sample_buffers, replay_buffer):
     procs = list()
     for i in range(len(sample_buffers)):  # (2 for double-buffer.)
         ctrl = AttrDict(
             quit=self.ctrl.quit,
             sample_ready=self.ctrl.sample_ready[i],
             sample_copied=self.ctrl.sample_copied[i],
         )
         procs.append(mp.Process(target=memory_copier,
             args=(sample_buffers[i], self.algo.samples_to_buffer,
             replay_buffer, ctrl)))
     for p in procs:
         p.start()
     self.memcpy_procs = procs
Esempio n. 18
0
    def _build_parallel_ctrl(self, n_worker):
        """
        创建用于控制并行训练过程的一些数据结构。

        multiprocessing.RawValue:不存在lock的多进程间共享值。
        multiprocessing.Barrier:一种简单的同步原语,用于固定数目的进程相互等待。当所有进程都调用wait以后,所有进程会同时开始执行。
        multiprocessing.Queue:用于多进程间数据传递的消息队列。

        :param n_worker: 真正的worker数(不一定等于用户设置的那个原始值)。
        """
        self.ctrl = AttrDict(
            quit=mp.RawValue(ctypes.c_bool, False),
            barrier_in=mp.Barrier(
                n_worker +
                1),  # 需要有n_worker+1个wait()被调用,所有multiprocessing启动的进程才会"解锁"
            barrier_out=mp.Barrier(n_worker + 1),
            do_eval=mp.RawValue(ctypes.c_bool, False),
            itr=mp.RawValue(ctypes.c_long, 0),
        )
        self.traj_infos_queue = mp.Queue()  # 多进程间共享的队列
        self.eval_traj_infos_queue = mp.Queue()
        # RawValue(typecode_or_type, *args) 返回从共享内存中分配的ctypes对象,这里为bool类型的对象
        self.sync = AttrDict(stop_eval=mp.RawValue(ctypes.c_bool, False))
Esempio n. 19
0
    def launch_workers(self, double_buffer_slice, affinity, seed, n_envs_list):
        self.n_worker = n_worker = len(n_envs_list)
        # A little slight-of-hand to make 2-level signal:
        self.ctrl.stop_eval = self.sync.stop_eval
        self.sync = AttrDict(
            obs_ready=[mp.Semaphore(0) for _ in range(n_worker)],
            act_ready=[mp.Semaphore(0) for _ in range(n_worker)],
            stop_eval=mp.RawValue(ctypes.c_bool, False),  # Overwrite.
            # stop_eval=self.ctrl.stop_eval,  # No, make 2-level signal.
            db_idx=self.ctrl.
            db_idx,  # Copy into sync which passes to Collector.
        )
        self.step_buffer_pyt, self.step_buffer_np = build_step_buffer(
            self.examples, sum(n_envs_list))
        self.agent_inputs = AgentInputs(
            self.step_buffer_pyt.observation,
            self.step_buffer_pyt.action,
            self.step_buffer_pyt.reward,
        )

        if self.eval_n_envs > 0:
            eval_n_envs = self.eval_n_envs_per * n_worker
            eval_step_buffer_pyt, eval_step_buffer_np = build_step_buffer(
                self.examples, eval_n_envs)
            self.eval_step_buffer_pyt = eval_step_buffer_pyt
            self.eval_step_buffer_np = eval_step_buffer_np
            self.eval_agent_inputs = AgentInputs(
                self.eval_step_buffer_pyt.observation,
                self.eval_step_buffer_pyt.action,
                self.eval_step_buffer_pyt.reward,
            )
            # eval_max_T already made in earlier initialize.

        self.double_buffer = double_buffer_slice  # Now only see my part.
        common_kwargs = self._assemble_common_kwargs(affinity)
        common_kwargs["agent"] = None  # Remove.
        workers_kwargs = self._assemble_workers_kwargs(affinity, seed,
                                                       n_envs_list)

        # Yes, fork again.
        self.workers = [
            mp.Process(
                target=sampling_process,
                kwargs=dict(common_kwargs=common_kwargs,
                            worker_kwargs=w_kwargs),
            ) for w_kwargs in workers_kwargs
        ]
        for w in self.workers:
            w.start()
Esempio n. 20
0
 def build_ctrl(self, world_size):
     """
     Builds several parallel communication mechanisms for controlling the
     workflow across processes.
     """
     opt_throttle = (mp.Barrier(world_size) if world_size > 1 else None)
     return AttrDict(
         quit=mp.Value('b', lock=True),
         quit_opt=mp.RawValue('b'),
         sample_ready=[mp.Semaphore(0) for _ in range(2)],  # Double buffer.
         sample_copied=[mp.Semaphore(1) for _ in range(2)],
         sampler_itr=mp.Value('l', lock=True),
         opt_throttle=opt_throttle,
         eval_time=mp.Value('d', lock=True),
     )
Esempio n. 21
0
def build_cpu_affinity(slt,
                       cpu,
                       cpr,
                       cpw=1,
                       hto=None,
                       res=0,
                       skt=1,
                       gpu=0,
                       alt=0,
                       saf=1):
    assert gpu == 0
    assert cpu % cpr == 0
    hto = cpu if hto is None else hto  # Default is None, 0 is OFF.
    # print(f"build_cpu_affinity - hto: {hto}, cpu: {cpu}, skt: {skt}")
    # print(f"Is this 0? {(hto - cpu) % skt}")
    assert (hto - cpu) % skt == 0
    n_run_slots = cpu // cpr
    assert slt <= n_run_slots
    cpu_per_skt = max(cpu, hto) // skt
    if n_run_slots >= skt:
        slt_per_skt = n_run_slots // skt
        my_skt = slt // slt_per_skt
        slt_in_skt = slt % slt_per_skt
        min_core = my_skt * cpu_per_skt + slt_in_skt * cpr
        cores = tuple(range(min_core, min_core + cpr))
    else:  # One run multiple sockets.
        skt_per_slt = skt // n_run_slots
        cores = list()
        low_skt = slt * skt_per_slt
        for s in range(skt_per_slt):
            min_core = (low_skt + s) * cpu_per_skt
            high_core = min_core + cpr // skt_per_slt
            cores.extend(list(range(min_core, high_core)))
        cores = tuple(cores)
    worker_cores = cores[res:]
    assert len(worker_cores) % cpw == 0
    master_cpus = get_master_cpus(cores, hto)
    workers_cpus = get_workers_cpus(worker_cores, cpw, hto, alt)
    affinity = AttrDict(
        all_cpus=master_cpus,
        master_cpus=master_cpus,
        workers_cpus=workers_cpus,
        master_torch_threads=len(cores),
        worker_torch_threads=cpw,
        alternating=bool(alt),  # Just to pass through a check.
        set_affinity=bool(saf),
    )
    return affinity
Esempio n. 22
0
    def sample_runner_initialize(self, affinity):
        n_server = len(affinity)
        n_worker = sum(len(aff["workers_cpus"]) for aff in affinity)
        n_envs_list = [self.batch_spec.B // n_worker] * n_worker
        if not self.batch_spec.B % n_worker == 0:
            logger.log(
                "WARNING: unequal number of envs per process, from "
                f"batch_B {self.batch_spec.B} and n_parallel {n_worker} "
                "(possible suboptimal speed).")
            for b in range(self.batch_spec.B % n_worker):
                n_envs_list[b] += 1

        if self.eval_n_envs > 0:
            eval_n_envs_per = max(1, self.eval_n_envs // len(n_envs_list))
            eval_n_envs = eval_n_envs_per * n_worker
            logger.log(f"Total parallel evaluation envs: {eval_n_envs}.")
            self.eval_max_T = 1 + int(self.eval_max_steps // eval_n_envs)
            self.eval_n_envs_per = eval_n_envs_per
        else:
            self.eval_n_envs_per = 0
            self.eval_max_T = 0

        ctrl = AttrDict(
            quit=mp.RawValue(ctypes.c_bool, False),
            barrier_in=mp.Barrier(n_server + n_worker + 1),
            barrier_out=mp.Barrier(n_server + n_worker + 1),
            do_eval=mp.RawValue(ctypes.c_bool, False),
            itr=mp.RawValue(ctypes.c_long, 0),
        )
        traj_infos_queue = mp.Queue()

        common_kwargs = dict(
            ctrl=ctrl,
            traj_infos_queue=traj_infos_queue,
        )
        servers_kwargs = assemble_servers_kwargs(affinity, n_envs_list,
                                                 self.seed, self.double_buffer)
        servers = [
            mp.Process(target=self.action_server_process,
                       kwargs=s_kwargs.update(**common_kwargs))
            for s_kwargs in servers_kwargs
        ]
        for s in servers:
            s.start()
        self.servers = servers
        self.ctrl = ctrl
        self.traj_infos_queue = traj_infos_queue
Esempio n. 23
0
 def reset_dones(self, done):
     l_A, r_A, nr_A, dr_A, _cd_A = self.Length[done], self.Return[
         done], self.NonzeroRewards[done], self.DiscountedReturn[
             done], self._cur_discount[done]
     completed_infos = [
         AttrDict(Length=l,
                  Return=r,
                  NonzeroRewards=nr,
                  DiscountedReturn=dr,
                  _cur_discount=_cd,
                  _discount=self._discount)
         for l, r, nr, dr, _cd in zip(l_A, r_A, nr_A, dr_A, _cd_A)
     ]
     self.Length[done], self.Return[done], self.NonzeroRewards[
         done], self.DiscountedReturn[done], self._cur_discount[
             done] = 0, 0., 0., 0., 1.
     return completed_infos
Esempio n. 24
0
 def _assemble_workers_kwargs(self, affinity, seed, n_envs_list):
     workers_kwargs = super()._assemble_workers_kwargs(
         affinity, seed, n_envs_list)
     i_env = 0
     for rank, w_kwargs in enumerate(workers_kwargs):
         n_envs = n_envs_list[rank]
         slice_B = slice(i_env, i_env + n_envs)
         w_kwargs["sync"] = AttrDict(
             stop_eval=self.sync.stop_eval,
             obs_ready=self.sync.obs_ready[rank],
             act_ready=self.sync.act_ready[rank],
         )
         w_kwargs["step_buffer_np"] = self.step_buffer_np[slice_B]
         if self.eval_n_envs > 0:
             eval_slice_B = slice(self.eval_n_envs_per * rank,
                                  self.eval_n_envs_per * (rank + 1))
             w_kwargs["eval_step_buffer_np"] = \
                 self.eval_step_buffer_np[eval_slice_B]
         i_env += n_envs
     return workers_kwargs
Esempio n. 25
0
def build_and_train():
    opt_affinities = list()
    opt_affinity = dict(cpus=[0],
                        cuda_idx=None,
                        torch_threads=1,
                        set_affinity=True)
    opt_affinities.append(opt_affinity)
    smp_affinity = AttrDict(
        all_cpus=[0, 1],
        master_cpus=[0],
        workers_cpus=[1],
        master_torch_threads=1,
        worker_torch_threads=1,
        cuda_idx=None,
        alternating=False,  # Just to pass through a check.
        set_affinity=True,
    )
    affinity = AttrDict(
        all_cpus=[0, 1],  # For exp launcher to use taskset.
        optimizer=opt_affinities,
        sampler=smp_affinity,
        set_affinity=True,
    )
    sampler = AsyncCpuSampler(EnvCls=_make_env,
                              env_kwargs=dict(rank=0),
                              batch_T=600,
                              batch_B=3,
                              max_decorrelation_steps=0,
                              CollectorCls=DbCpuResetCollector)
    algo = SAC(batch_size=256,
               min_steps_learn=10000,
               replay_size=1000000,
               replay_ratio=1,
               target_update_interval=1,
               target_entropy=-9,
               target_update_tau=0.01,
               learning_rate=0.00025,
               action_prior="uniform",
               reward_scale=1,
               reparameterize=True,
               clip_grad_norm=1e9,
               n_step_return=1,
               updates_per_sync=1,
               bootstrap_timelimit=False)  # Run with defaults.
    agent = SacAgent(model_kwargs={'hidden_sizes': [256, 256]})
    runner = AsyncRl(
        algo=algo,
        agent=agent,
        sampler=sampler,
        n_steps=50e6,
        log_interval_steps=10000,
        affinity=affinity,
    )
    config = dict(env_id='picking')
    name = "sac_rlpyt_picking"
    log_dir = os.path.join(os.path.dirname(__file__), "sac_rlpyt_picking")
    with logger_context(log_dir,
                        0,
                        name,
                        config,
                        use_summary_writer=False,
                        snapshot_mode='all'):
        runner.train()
Esempio n. 26
0
def _mux_sampler(common_kwargs, worker_kwargs):
    """Variant of `rlpyt.samplers.parallel.worker.sampling_process` that is
    able to supply different environment keyword arguments to each environment
    that makes up a batch."""
    c, w = AttrDict(**common_kwargs), AttrDict(**worker_kwargs)
    initialize_worker(w.rank, w.seed, w.cpus, c.torch_threads)
    # vvv CHANGED LINES vvv
    if isinstance(c.env_kwargs, (list, tuple)):
        env_ranks = w["env_ranks"]
        envs = [c.EnvCls(**c.env_kwargs[rank]) for rank in env_ranks]
    else:
        envs = [c.EnvCls(**c.env_kwargs) for _ in range(w.n_envs)]
    # ^^^ CHANGED LINES ^^^
    collector = c.CollectorCls(
        rank=w.rank,
        envs=envs,
        samples_np=w.samples_np,
        batch_T=c.batch_T,
        TrajInfoCls=c.TrajInfoCls,
        agent=c.get("agent", None),  # Optional depending on parallel setup.
        sync=w.get("sync", None),
        step_buffer_np=w.get("step_buffer_np", None),
        global_B=c.get("global_B", 1),
        env_ranks=w.get("env_ranks", None),
    )
    agent_inputs, traj_infos = collector.start_envs(c.max_decorrelation_steps)
    collector.start_agent()

    if c.get("eval_n_envs", 0) > 0:
        eval_envs = [
            c.EnvCls(**c.eval_env_kwargs) for _ in range(c.eval_n_envs)
        ]
        eval_collector = c.eval_CollectorCls(
            rank=w.rank,
            envs=eval_envs,
            TrajInfoCls=c.TrajInfoCls,
            traj_infos_queue=c.eval_traj_infos_queue,
            max_T=c.eval_max_T,
            agent=c.get("agent", None),
            sync=w.get("sync", None),
            step_buffer_np=w.get("eval_step_buffer_np", None),
        )
    else:
        eval_envs = list()

    ctrl = c.ctrl
    ctrl.barrier_out.wait()
    while True:
        collector.reset_if_needed(agent_inputs)  # Outside barrier?
        ctrl.barrier_in.wait()
        if ctrl.quit.value:
            break
        if ctrl.do_eval.value:
            eval_collector.collect_evaluation(
                ctrl.itr.value)  # Traj_infos to queue inside.
        else:
            (agent_inputs, traj_infos,
             completed_infos) = collector.collect_batch(
                 agent_inputs, traj_infos, ctrl.itr.value)
            for info in completed_infos:
                c.traj_infos_queue.put(info)
        ctrl.barrier_out.wait()

    for env in envs + eval_envs:
        env.close()
Esempio n. 27
0
def build_async_affinity(run_slot,
                         gpu,
                         cpu,
                         gpr=1,
                         sgr=0,
                         oss=0,
                         cpw=1,
                         hto=None,
                         res=1,
                         skt=1,
                         alt=0,
                         saf=1):
    oss = bool(oss)
    sgr = gpr if oss else sgr
    total_gpr = (gpr + sgr * (not oss))
    n_run_slots = gpu // total_gpr
    assert run_slot < n_run_slots
    cpr = cpu // n_run_slots
    smp_cpr = cpr - res * gpr
    gpu_per_skt = gpu // skt
    hto = cpu if hto is None else hto  # Default is None, 0 is OFF.
    cpu_per_skt = max(cpu, hto) // skt
    opt_affinities = list()
    smp_affinities = list()
    all_cpus = tuple()
    if total_gpr <= gpu_per_skt:
        run_per_skt = n_run_slots // skt
        assert n_run_slots % skt == 0  # Relax later?
        skt_per_run = 1
        run_in_skt = run_slot % run_per_skt
        my_skt = run_slot // run_per_skt
        low_opt_gpu = my_skt * gpu_per_skt + run_in_skt * total_gpr
        high_opt_gpu = low_opt_gpu + gpr
        my_opt_gpus = list(range(low_opt_gpu, high_opt_gpu))
        my_smp_gpus = (my_opt_gpus if oss else list(
            range(high_opt_gpu, high_opt_gpu + sgr)))
    else:  # One run takes more than one socket: spread opt gpus across sockets.
        skt_per_run = skt // n_run_slots
        low_skt = run_slot * skt_per_run
        assert gpr % skt_per_run == 0, "Maybe try n_socket=1."
        assert sgr % skt_per_run == 0, "Maybe try n_socket=1."
        my_opt_gpus = list()
        my_smp_gpus = list()
        run_in_skt = run_per_skt = 0
        for s in range(skt_per_run):
            low_opt_gpu = (low_skt + s) * gpu_per_skt
            high_opt_gpu = low_opt_gpu + gpr // skt_per_run
            my_opt_gpus.extend(list(range(low_opt_gpu, high_opt_gpu)))
            if oss:
                my_smp_gpus = my_opt_gpus
            else:
                high_smp_gpu = high_opt_gpu + sgr // skt_per_run
                my_smp_gpus.extend(list(range(high_opt_gpu, high_smp_gpu)))
    for i, opt_gpu in enumerate(my_opt_gpus):
        gpu_in_skt = opt_gpu % gpu_per_skt
        gpu_skt = opt_gpu // gpu_per_skt
        gpu_res = i if run_per_skt >= 1 else gpu_in_skt
        low_opt_core = (gpu_skt * cpu_per_skt + run_in_skt * cpr +
                        gpu_res * res)
        high_opt_core = low_opt_core + res
        opt_cores = tuple(range(low_opt_core, high_opt_core))
        opt_cpus = get_master_cpus(opt_cores, hto)
        opt_affinity = dict(cpus=opt_cpus,
                            cuda_idx=opt_gpu,
                            torch_threads=len(opt_cores),
                            set_affinity=bool(saf))
        opt_affinities.append(opt_affinity)
        all_cpus += opt_cpus
    wrkr_per_smp = smp_cpr // cpw
    smp_cpr = wrkr_per_smp * cpw
    smp_cpg = smp_cpr // max(1, sgr)
    for i, smp_gpu in enumerate(my_smp_gpus):
        gpu_skt = smp_gpu // gpu_per_skt
        gpu_in_skt = smp_gpu % gpu_per_skt
        smp_cpu_off = (i if run_per_skt >= 1 else gpu_in_skt -
                       (gpr // skt_per_run))
        low_smp_core = (gpu_skt * cpu_per_skt + run_in_skt * cpr +
                        (gpr // skt_per_run) * res + smp_cpu_off * smp_cpg)
        high_smp_core = low_smp_core + smp_cpg
        master_cores = tuple(range(low_smp_core, high_smp_core))
        master_cpus = get_master_cpus(master_cores, hto)
        workers_cpus = get_workers_cpus(master_cores, cpw, hto, alt)
        smp_affinity = AttrDict(
            all_cpus=master_cpus,
            master_cpus=master_cpus,
            workers_cpus=workers_cpus,
            master_torch_threads=len(master_cores),
            worker_torch_threads=cpw,
            cuda_idx=smp_gpu,
            alternating=bool(alt),  # Just to pass through a check.
            set_affinity=bool(saf),
        )
        smp_affinities.append(smp_affinity)
        all_cpus += master_cpus
    if not smp_affinities:  # sgr==0; CPU sampler.
        if total_gpr <= gpu_per_skt:
            low_smp_core = (my_skt * cpu_per_skt + run_in_skt * cpr +
                            gpr * res)
            master_cores = tuple(range(low_smp_core, low_smp_core + smp_cpr))
        else:
            master_cores = tuple()
            for s in range(skt_per_run):
                low_smp_core = ((low_skt + s) * cpu_per_skt +
                                (gpr // gpu_per_skt) * res)
                master_cores += tuple(
                    range(low_smp_core, low_smp_core + smp_cpr // skt_per_run))
        master_cpus = get_master_cpus(master_cores, hto)
        workers_cpus = get_workers_cpus(master_cores, cpw, hto, alt)
        smp_affinities = AttrDict(
            all_cpus=master_cpus,
            master_cpus=master_cpus,
            workers_cpus=workers_cpus,
            master_torch_threads=len(master_cores),
            worker_torch_threads=cpw,
            cuda_idx=None,
            alternating=bool(alt),  # Just to pass through a check.
            set_affinity=bool(saf),
        )
        all_cpus += master_cpus
    affinity = AttrDict(
        all_cpus=all_cpus,  # For exp launcher to use taskset.
        optimizer=opt_affinities,
        sampler=smp_affinities,
        set_affinity=bool(saf),
    )

    return affinity
Esempio n. 28
0
def sampling_process(common_kwargs, worker_kwargs):
    """Target function used for forking parallel worker processes in the
    samplers. After ``initialize_worker()``, it creates the specified number
    of environment instances and gives them to the collector when
    instantiating it.  It then calls collector startup methods for
    environments and agent.  If applicable, instantiates evaluation
    environment instances and evaluation collector.

    Then enters infinite loop, waiting for signals from master to collect
    training samples or else run evaluation, until signaled to exit.
    """
    c, w = AttrDict(**common_kwargs), AttrDict(**worker_kwargs)
    initialize_worker(w.rank, w.seed, w.cpus, c.torch_threads)

    envs = [c.EnvCls(**c.env_kwargs) for _ in range(w.n_envs)]

    log_heatmaps = c.env_kwargs.get('log_heatmaps', None)

    if log_heatmaps is not None and log_heatmaps == True:
        for env in envs[1:]:
            env.log_heatmaps = False

    if c.record_freq > 0:
        if c.env_kwargs['game'] in ATARI_ENVS:
            envs[0].record_env = True
            os.makedirs(os.path.join(c.log_dir, 'videos/frames'))
        elif c.get(
                "eval_n_envs", 0
        ) == 0:  # only record workers if no evaluation processes are performed
            envs[0] = Monitor(envs[0],
                              c.log_dir + '/videos',
                              video_callable=lambda episode_id: episode_id % c.
                              record_freq == 0)

    set_envs_seeds(envs, w.seed)

    collector = c.CollectorCls(
        rank=w.rank,
        envs=envs,
        samples_np=w.samples_np,
        batch_T=c.batch_T,
        TrajInfoCls=c.TrajInfoCls,
        agent=c.get("agent", None),  # Optional depending on parallel setup.
        sync=w.get("sync", None),
        step_buffer_np=w.get("step_buffer_np", None),
        global_B=c.get("global_B", 1),
        env_ranks=w.get("env_ranks", None),
        no_extrinsic=c.no_extrinsic)
    agent_inputs, traj_infos = collector.start_envs(c.max_decorrelation_steps)
    collector.start_agent()

    if c.get("eval_n_envs", 0) > 0:
        eval_envs = [
            c.EnvCls(**c.eval_env_kwargs) for _ in range(c.eval_n_envs)
        ]
        if c.record_freq > 0:
            eval_envs[0] = Monitor(eval_envs[0],
                                   c.log_dir + '/videos',
                                   video_callable=lambda episode_id: episode_id
                                   % c.record_freq == 0)
        set_envs_seeds(eval_envs, w.seed)
        eval_collector = c.eval_CollectorCls(
            rank=w.rank,
            envs=eval_envs,
            TrajInfoCls=c.TrajInfoCls,
            traj_infos_queue=c.eval_traj_infos_queue,
            max_T=c.eval_max_T,
            agent=c.get("agent", None),
            sync=w.get("sync", None),
            step_buffer_np=w.get("eval_step_buffer_np", None),
        )
    else:
        eval_envs = list()

    ctrl = c.ctrl
    ctrl.barrier_out.wait()
    while True:
        collector.reset_if_needed(agent_inputs)  # Outside barrier?
        ctrl.barrier_in.wait()
        if ctrl.quit.value:
            logger.log('Quitting worker ...')
            break
        if ctrl.do_eval.value:
            eval_collector.collect_evaluation(
                ctrl.itr.value)  # Traj_infos to queue inside.
        else:
            agent_inputs, traj_infos, completed_infos = collector.collect_batch(
                agent_inputs, traj_infos, ctrl.itr.value)
            for info in completed_infos:
                c.traj_infos_queue.put(info)
        ctrl.barrier_out.wait()

    for env in envs + eval_envs:
        logger.log('Stopping env ...')
        env.close()
Esempio n. 29
0
def sampling_process(common_kwargs, worker_kwargs):
    """Target function used for forking parallel worker processes in the
    samplers. After ``initialize_worker()``, it creates the specified number
    of environment instances and gives them to the collector when
    instantiating it.  It then calls collector startup methods for
    environments and agent.  If applicable, instantiates evaluation
    environment instances and evaluation collector.

    Then enters infinite loop, waiting for signals from master to collect
    training samples or else run evaluation, until signaled to exit.
    """
    c, w = AttrDict(**common_kwargs), AttrDict(**worker_kwargs)
    initialize_worker(w.rank, w.seed, w.cpus, c.torch_threads)
    envs = [c.EnvCls(**c.env_kwargs) for _ in range(w.n_envs)]
    set_envs_seeds(envs, w.seed)

    collector = c.CollectorCls(
        rank=w.rank,
        envs=envs,
        samples_np=w.samples_np,
        batch_T=c.batch_T,
        TrajInfoCls=c.TrajInfoCls,
        agent=c.get("agent", None),  # Optional depending on parallel setup.
        sync=w.get("sync", None),
        step_buffer_np=w.get("step_buffer_np", None),
        global_B=c.get("global_B", 1),
        env_ranks=w.get("env_ranks", None),
    )
    agent_inputs, traj_infos = collector.start_envs(c.max_decorrelation_steps)
    collector.start_agent()

    if c.get("eval_n_envs", 0) > 0:
        eval_envs = [
            c.EnvCls(**c.eval_env_kwargs) for _ in range(c.eval_n_envs)
        ]
        set_envs_seeds(eval_envs, w.seed)
        eval_collector = c.eval_CollectorCls(
            rank=w.rank,
            envs=eval_envs,
            TrajInfoCls=c.TrajInfoCls,
            traj_infos_queue=c.eval_traj_infos_queue,
            max_T=c.eval_max_T,
            agent=c.get("agent", None),
            sync=w.get("sync", None),
            step_buffer_np=w.get("eval_step_buffer_np", None),
        )
    else:
        eval_envs = list()

    ctrl = c.ctrl
    ctrl.barrier_out.wait()
    while True:
        collector.reset_if_needed(agent_inputs)  # Outside barrier?
        ctrl.barrier_in.wait()
        if ctrl.quit.value:
            break
        if ctrl.do_eval.value:
            # Traj_infos to queue inside.
            eval_collector.collect_evaluation(ctrl.itr.value)
        else:
            agent_inputs, traj_infos, completed_infos = collector.collect_batch(
                agent_inputs, traj_infos, ctrl.itr.value)
            for info in completed_infos:
                c.traj_infos_queue.put(info)
        ctrl.barrier_out.wait()

    for env in envs + eval_envs:
        env.close()
Esempio n. 30
0
def sampling_process(common_kwargs, worker_kwargs):
    """
    Arguments fed from the Sampler class in master process.

    采样进程函数。

    :param common_kwargs: 各个worker通用的参数列表。
    :param worker_kwargs: 各个worker可能不同的参数列表。
    """
    c, w = AttrDict(**common_kwargs), AttrDict(**worker_kwargs)
    initialize_worker(w.rank, w.seed, w.cpus, c.torch_threads)
    # 初始化用于training的environment实例和collector实例
    envs = [c.EnvCls(**c.env_kwargs) for _ in range(w.n_envs)]
    collector = c.CollectorCls(
        rank=w.rank,
        envs=envs,
        samples_np=w.samples_np,
        batch_T=c.batch_T,
        TrajInfoCls=c.TrajInfoCls,
        agent=c.get("agent", None),  # Optional depending on parallel setup.
        sync=w.get("sync", None),
        step_buffer_np=w.get("step_buffer_np", None),
        global_B=c.get("global_B", 1),
        env_ranks=w.get("env_ranks", None),
    )
    agent_inputs, traj_infos = collector.start_envs(
        c.max_decorrelation_steps)  # 这里会做收集(采样)第一批数据的工作
    collector.start_agent()  # collector的初始化

    # 初始化用于evaluation的environment实例和collector实例
    if c.get("eval_n_envs", 0) > 0:
        eval_envs = [
            c.EnvCls(**c.eval_env_kwargs) for _ in range(c.eval_n_envs)
        ]
        eval_collector = c.eval_CollectorCls(
            rank=w.rank,
            envs=eval_envs,
            TrajInfoCls=c.TrajInfoCls,
            traj_infos_queue=c.eval_traj_infos_queue,
            max_T=c.eval_max_T,
            agent=c.get("agent", None),
            sync=w.get("sync", None),
            step_buffer_np=w.get("eval_step_buffer_np", None),
        )
    else:
        eval_envs = list()

    ctrl = c.ctrl  # 用于控制多个worker进程同时运行时能正确运作的控制器
    ctrl.barrier_out.wait(
    )  # 每个worker都有一个wait(),加上ParallelSamplerBase.initialize()中的一个wait(),刚好n_worker+1个
    while True:
        collector.reset_if_needed(agent_inputs)  # Outside barrier?
        ctrl.barrier_in.wait()
        if ctrl.quit.value:  # 在主进程中set了这个值为True时,所有worker进程会退出采样
            break
        if ctrl.do_eval.value:  # 在主进程的evaluate_agent()函数里set了这个值为True时,这里才会收集evaluation用的数据
            eval_collector.collect_evaluation(
                ctrl.itr.value)  # Traj_infos to queue inside.
        else:  # 不是做evaluation
            agent_inputs, traj_infos, completed_infos = collector.collect_batch(
                agent_inputs, traj_infos, ctrl.itr.value)
            for info in completed_infos:
                c.traj_infos_queue.put(info)  # 向所有worker进程共享的队列塞入当前worker的统计数据
        ctrl.barrier_out.wait()

    # 清理environment
    for env in envs + eval_envs:
        env.close()