def _init(self): """ Initialize env runners, that actually do all the work. Also we're doing some utility stuff here, e.g. setting process affinity (this is a performance optimization). """ log.info('Initializing envs for env runner %d...', self.worker_idx) if self.cfg.force_envs_single_thread: from threadpoolctl import threadpool_limits threadpool_limits(limits=1, user_api=None) if self.cfg.set_workers_cpu_affinity: set_process_cpu_affinity(self.worker_idx, self.cfg.num_workers) psutil.Process().nice(min(self.cfg.default_niceness + 10, 20)) self.env_runners = [] for split_idx in range(self.num_splits): env_runner = VectorEnvRunner( self.cfg, self.vector_size // self.num_splits, self.worker_idx, split_idx, self.num_agents, self.shared_buffers, self.reward_shaping, ) env_runner.init() self.env_runners.append(env_runner)
def _init(self): log.info('Initializing envs for env runner %d...', self.worker_idx) if self.cfg.force_envs_single_thread: from threadpoolctl import threadpool_limits threadpool_limits(limits=1, user_api=None) if self.cfg.set_workers_cpu_affinity: set_process_cpu_affinity(self.worker_idx, self.cfg.num_workers) psutil.Process().nice(min(self.cfg.default_niceness + 10, 20)) self.env_runners = [] for split_idx in range(self.num_splits): env_runner = VectorEnvRunner( self.cfg, self.vector_size // self.num_splits, self.worker_idx, split_idx, self.num_agents, self.shared_buffers, self.reward_shaping, ) env_runner.init() self.env_runners.append(env_runner)
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() from threadpoolctl import threadpool_limits with threadpool_limits(limits=1, user_api=None): if self.cfg.set_workers_cpu_affinity: set_process_cpu_affinity(proc_idx, self.cfg.num_workers) initial_cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None psutil.Process().nice(10) with timing.timeit('env_init'): envs = [] env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)] for env_idx in range(self.cfg.num_envs_per_worker): global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx env_config = AttrDict(worker_index=proc_idx, vector_index=env_idx, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) log.debug( 'CPU affinity after create_env: %r', psutil.Process().cpu_affinity() if platform != 'darwin' else 'MacOS - None') env.seed(global_env_id) envs.append(env) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key[env_idx] = env.unwrapped.level_name episode_length = [0 for _ in envs] episode_lengths = [deque([], maxlen=20) for _ in envs] try: with timing.timeit('first_reset'): for env_idx, env in enumerate(envs): env.reset() log.info('Process %d finished resetting %d/%d envs', proc_idx, env_idx + 1, len(envs)) self.report_queue.put( dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: for env_idx, env in enumerate(envs): action = env.action_space.sample() with timing.add_time(f'{env_key[env_idx]}.step'): obs, reward, done, info = env.step(action) num_frames = info.get('num_frames', 1) total_env_frames += num_frames episode_length[env_idx] += num_frames if done: with timing.add_time( f'{env_key[env_idx]}.reset'): env.reset() episode_lengths[env_idx].append( episode_length[env_idx]) episode_length[env_idx] = 0 with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) # Extra check to make sure cpu affinity is preserved throughout the execution. # I observed weird effect when some environments tried to alter affinity of the current process, leading # to decreased performance. # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc. # At least user should know about it if this is happening. cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None assert initial_cpu_affinity == cpu_affinity, \ f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \ f'This can significantly affect performance!' except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.01 + 0.01) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) for env_idx, env in enumerate(envs): if len(episode_lengths[env_idx]) > 0: log.warning('Level %s avg episode len %d', env_key[env_idx], np.mean(episode_lengths[env_idx])) for env in envs: env.close()