Example #1
0
    def test_queue_usage(self):
        q = Queue(1000 *
                  1000)  # specify the size of the circular buffer in the ctor

        # any pickle-able Python object can be added to the queue
        py_obj = dict(a=42, b=33, c=(1, 2, 3), d=[1, 2, 3], e='123', f=b'kkk')
        q.put(py_obj)
        assert q.qsize() == 1

        retrieved = q.get()
        assert q.empty()
        assert py_obj == retrieved

        for i in range(100):
            try:
                q.put(py_obj, timeout=0.1)
            except Full:
                log.debug('Queue is full!')

        num_received = 0
        while num_received < 100:
            # get multiple messages at once, returns a list of messages for better performance in many-to-few scenarios
            # get_many does not guarantee that all max_messages_to_get will be received on the first call, in fact
            # no such guarantee can be made in multiprocessing systems.
            # get_many() will retrieve as many messages as there are available AND can fit in the pre-allocated memory
            # buffer. The size of the buffer is increased gradually to match demand.
            messages = q.get_many(max_messages_to_get=100)
            num_received += len(messages)

        try:
            q.get(timeout=0.1)
            assert True, 'This won\'t be called'
        except Empty:
            log.debug('Queue is empty')
Example #2
0
    def test_spawn_ctx(self):
        ctx = multiprocessing.get_context('spawn')
        data_q = Queue(1000 * 1000)
        procs = [
            ctx.Process(target=spawn_producer, args=(data_q, ))
            for _ in range(2)
        ]
        procs.append(ctx.Process(target=spawn_consumer, args=(data_q, )))

        # add data to the queue and read some of it back to make sure all buffers are initialized before
        # the new process is spawned (such that we need to pickle everything)
        for i in range(10):
            data_q.put(self.test_spawn_ctx.__name__)
        msgs = data_q.get_many(max_messages_to_get=2)
        print(msgs)

        for p in procs:
            p.start()
        for p in procs:
            p.join()
Example #3
0
class AsyncEnvs:
    def __init__(self, env_fns, num_rollout_workers, storage):
        self.env_fns = env_fns
        self.num_rollout_workers = num_rollout_workers
        self.rollout_task_queues = [
            FastQueue(1000) for i in range(num_rollout_workers)
        ]
        self.stats_queue = FastQueue(1000)
        self.policy_request_queue = FastQueue(1000)
        self.storage = storage
        assert len(env_fns) % self.num_rollout_workers == 0, \
            "number of rollout workers must divide the number of envs"
        self.num_envs_per_rollout_worker = len(
            env_fns) // self.num_rollout_workers

        for rollout_worker_idx in range(self.num_rollout_workers):
            mp.Process(target=self.start_rollout_worker,
                       args=(rollout_worker_idx, env_fns)).start()

    def start_rollout_worker(self, rollout_worker_idx, env_fns):
        sw = stopwatch.StopWatch()
        next_obs, next_done, obs, actions, logprobs, rewards, dones, values = self.storage
        env_idxs = range(
            rollout_worker_idx * self.num_envs_per_rollout_worker,
            rollout_worker_idx * self.num_envs_per_rollout_worker +
            self.num_envs_per_rollout_worker)
        envs = [None for _ in range(len(self.env_fns))]
        for env_idx in env_idxs:
            envs[env_idx] = self.env_fns[env_idx]()
            next_step = 0
            self.policy_request_queue.put(
                [next_step, env_idx, rollout_worker_idx])
            next_obs[env_idx] = torch.tensor(envs[env_idx].reset())
            next_done[env_idx] = 0

        local_step = 0
        while True:
            with sw.timer('act'):
                with sw.timer('wait_rollout_task_queue'):
                    tasks = self.rollout_task_queues[
                        rollout_worker_idx].get_many()

                with sw.timer('rollouts'):
                    for task in tasks:
                        step, env_idx = task
                        obs[step, env_idx] = next_obs[env_idx].copy()
                        dones[step, env_idx] = next_done[env_idx].copy()

                        next_obs[env_idx], r, d, info = envs[env_idx].step(
                            actions[step, env_idx])
                        if d:
                            next_obs[env_idx] = envs[env_idx].reset()
                        rewards[step, env_idx] = r
                        next_done[env_idx] = d
                        next_step = step + 1
                        local_step += 1

                        with sw.timer('logging'):
                            self.policy_request_queue.put(
                                [next_step, env_idx, rollout_worker_idx])
                            if 'episode' in info.keys():
                                # print(["charts/episode_reward", info['episode']['r']])
                                # self.stats_queue.put(['l', info['episode']['l']])
                                self.stats_queue.put([
                                    "charts/episode_reward",
                                    info['episode']['r']
                                ])

            if local_step % 1000 == 0:
                print(stopwatch.format_report(sw.get_last_aggregated_report()))
                print()
Example #4
0
class DummySampler(AlgorithmBase):
    @classmethod
    def add_cli_args(cls, parser):
        p = parser
        super().add_cli_args(p)

        p.add_argument(
            '--num_workers',
            default=multiprocessing.cpu_count(),
            type=int,
            help='Number of processes to use to sample the environment.')
        p.add_argument(
            '--num_envs_per_worker',
            default=1,
            type=int,
            help='Number of envs on a single CPU sampled sequentially.')

        p.add_argument(
            '--sample_env_frames',
            default=int(2e6),
            type=int,
            help=
            'Stop after sampling this many env frames (this takes frameskip into account)'
        )
        p.add_argument(
            '--sample_env_frames_per_worker',
            default=int(1e5),
            type=int,
            help=
            'Stop after sampling this many env frames per worker (this takes frameskip into account)'
        )

        p.add_argument(
            '--set_workers_cpu_affinity',
            default=True,
            type=str2bool,
            help=
            ('Whether to assign workers to specific CPU cores or not. The logic is beneficial for most workloads because prevents a lot of context switching.'
             'However for some environments it can be better to disable it, to allow one worker to use all cores some of the time. This can be the case for some DMLab environments with very expensive episode reset'
             'that can use parallel CPU cores for level generation.'),
        )

    def __init__(self, cfg):
        super().__init__(cfg)

        self.processes = []
        self.terminate = RawValue(ctypes.c_bool, False)

        self.start_event = multiprocessing.Event()
        self.start_event.clear()

        self.report_queue = Queue()
        self.report_every_sec = 1.0
        self.last_report = 0

        self.avg_stats_intervals = (1, 10, 60, 300, 600)
        self.fps_stats = deque([], maxlen=max(self.avg_stats_intervals))

    def initialize(self):
        # creating an environment in the main process tends to fix some very weird issues further down the line
        # https://stackoverflow.com/questions/60963839/importing-opencv-after-importing-pytorch-messes-with-cpu-affinity
        # do not delete this unless you know what you're doing
        tmp_env = create_env(self.cfg.env, cfg=self.cfg, env_config=None)
        tmp_env.close()

        for i in range(self.cfg.num_workers):
            p = multiprocessing.Process(target=self.sample, args=(i, ))
            self.processes.append(p)

    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        from threadpoolctl import threadpool_limits
        with threadpool_limits(limits=1, user_api=None):
            if self.cfg.set_workers_cpu_affinity:
                set_process_cpu_affinity(proc_idx, self.cfg.num_workers)

            initial_cpu_affinity = psutil.Process().cpu_affinity(
            ) if platform != 'darwin' else None
            psutil.Process().nice(10)

            with timing.timeit('env_init'):
                envs = []
                env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)]

                for env_idx in range(self.cfg.num_envs_per_worker):
                    global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx
                    env_config = AttrDict(worker_index=proc_idx,
                                          vector_index=env_idx,
                                          env_id=global_env_id)
                    env = create_env(self.cfg.env,
                                     cfg=self.cfg,
                                     env_config=env_config)
                    log.debug(
                        'CPU affinity after create_env: %r',
                        psutil.Process().cpu_affinity()
                        if platform != 'darwin' else 'MacOS - None')
                    env.seed(global_env_id)
                    envs.append(env)

                    # this is to track the performance for individual DMLab levels
                    if hasattr(env.unwrapped, 'level_name'):
                        env_key[env_idx] = env.unwrapped.level_name

                episode_length = [0 for _ in envs]
                episode_lengths = [deque([], maxlen=20) for _ in envs]

            try:
                with timing.timeit('first_reset'):
                    for env_idx, env in enumerate(envs):
                        env.reset()
                        log.info('Process %d finished resetting %d/%d envs',
                                 proc_idx, env_idx + 1, len(envs))

                    self.report_queue.put(
                        dict(proc_idx=proc_idx, finished_reset=True))

                self.start_event.wait()

                with timing.timeit('work'):
                    last_report = last_report_frames = total_env_frames = 0
                    while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                        for env_idx, env in enumerate(envs):
                            action = env.action_space.sample()
                            with timing.add_time(f'{env_key[env_idx]}.step'):
                                obs, reward, done, info = env.step(action)

                            num_frames = info.get('num_frames', 1)
                            total_env_frames += num_frames
                            episode_length[env_idx] += num_frames

                            if done:
                                with timing.add_time(
                                        f'{env_key[env_idx]}.reset'):
                                    env.reset()

                                episode_lengths[env_idx].append(
                                    episode_length[env_idx])
                                episode_length[env_idx] = 0

                        with timing.add_time('report'):
                            now = time.time()
                            if now - last_report > self.report_every_sec:
                                last_report = now
                                frames_since_last_report = total_env_frames - last_report_frames
                                last_report_frames = total_env_frames
                                self.report_queue.put(
                                    dict(proc_idx=proc_idx,
                                         env_frames=frames_since_last_report))

                # Extra check to make sure cpu affinity is preserved throughout the execution.
                # I observed weird effect when some environments tried to alter affinity of the current process, leading
                # to decreased performance.
                # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc.
                # At least user should know about it if this is happening.
                cpu_affinity = psutil.Process().cpu_affinity(
                ) if platform != 'darwin' else None
                assert initial_cpu_affinity == cpu_affinity, \
                    f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \
                    f'This can significantly affect performance!'

            except:
                log.exception('Unknown exception')
                log.error('Unknown exception in worker %d, terminating...',
                          proc_idx)
                self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

            time.sleep(proc_idx * 0.01 + 0.01)
            log.info('Process %d finished sampling. Timing: %s', proc_idx,
                     timing)

            for env_idx, env in enumerate(envs):
                if len(episode_lengths[env_idx]) > 0:
                    log.warning('Level %s avg episode len %d',
                                env_key[env_idx],
                                np.mean(episode_lengths[env_idx]))

            for env in envs:
                env.close()

    def report(self, env_frames):
        now = time.time()
        self.last_report = now

        self.fps_stats.append((now, env_frames))
        if len(self.fps_stats) <= 1:
            return

        fps = []
        for avg_interval in self.avg_stats_intervals:
            past_moment, past_frames = self.fps_stats[max(
                0,
                len(self.fps_stats) - 1 - avg_interval)]
            fps.append((env_frames - past_frames) / (now - past_moment))

        fps_str = []
        for interval, fps_value in zip(self.avg_stats_intervals, fps):
            fps_str.append(
                f'{int(interval * self.report_every_sec)} sec: {fps_value:.1f}'
            )
        fps_str = f'({", ".join(fps_str)})'
        log.info('Sampling FPS: %s. Total frames collected: %d', fps_str,
                 env_frames)

    def run(self):
        for p in self.processes:
            p.start()

        finished_reset = np.zeros([self.cfg.num_workers], dtype=np.bool)
        while not all(finished_reset):
            try:
                msg = self.report_queue.get(timeout=0.1)
                if 'finished_reset' in msg:
                    finished_reset[msg['proc_idx']] = True
                    log.debug('Process %d finished reset! Status %r',
                              msg['proc_idx'], finished_reset)
            except Empty:
                pass

        log.debug('All workers finished reset!')
        time.sleep(3)
        self.start_event.set()

        start = time.time()
        env_frames = 0
        last_process_report = [time.time() for _ in self.processes]

        while not self.terminate.value:
            try:
                try:
                    msgs = self.report_queue.get_many(
                        timeout=self.report_every_sec * 1.5)
                    for msg in msgs:
                        last_process_report[msg['proc_idx']] = time.time()

                        if 'crash' in msg:
                            self.terminate.value = True
                            log.error(
                                'Terminating due to process %d crashing...',
                                msg['proc_idx'])
                            break

                        env_frames += msg['env_frames']

                    if env_frames >= self.cfg.sample_env_frames:
                        self.terminate.value = True
                except Empty:
                    pass
            except KeyboardInterrupt:
                self.terminate.value = True
                log.error('KeyboardInterrupt in main loop! Terminating...')
                break

            if time.time() - self.last_report > self.report_every_sec:
                self.report(env_frames)

            for proc_idx, p in enumerate(self.processes):
                delay = time.time() - last_process_report[proc_idx]
                if delay > 600:
                    # killing the whole script is the best way to know that some of the processes froze
                    log.error(
                        'Process %d had not responded in %.1f s!!! Terminating...',
                        proc_idx, delay)
                    self.terminate.value = True

            for p in self.processes:
                if not p.is_alive():
                    self.terminate.value = True
                    log.error('Process %r died! terminating...', p)

        total_time = time.time() - start
        log.info('Collected %d frames in %.1f s, avg FPS: %.1f', env_frames,
                 total_time, env_frames / total_time)
        log.debug('Done sampling...')

    def finalize(self):
        try:
            self.report_queue.get_many_nowait()
        except Empty:
            pass

        log.debug('Joining worker processes...')
        for p in self.processes:
            p.join()
        log.debug('Done joining!')