def forward_pass(device_type): env_name = 'atari_breakout' cfg = default_cfg(algo='appooc', env=env_name) cfg.actor_critic_share_weights = True cfg.hidden_size = 128 cfg.use_rnn = True cfg.env_framestack = 4 env = create_env(env_name, cfg=cfg) torch.set_num_threads(1) torch.backends.cudnn.benchmark = True actor_critic = create_actor_critic(cfg, env.observation_space, env.action_space) device = torch.device(device_type) actor_critic.to(device) timing = Timing() with timing.timeit('all'): batch = 128 with timing.add_time('input'): # better avoid hardcoding here... observations = dict(obs=torch.rand([batch, 4, 84, 84]).to(device)) rnn_states = torch.rand([batch, get_hidden_size(cfg)]).to(device) n = 200 for i in range(n): with timing.add_time('forward'): output = actor_critic(observations, rnn_states) log.debug('Progress %d/%d', i, n) log.debug('Timing: %s', timing)
def test_quad_env(self): self.assertIsNotNone(create_env('quadrotor_single')) env = create_env('quadrotor_single') obs = env.reset() n_frames = 10000 timing = Timing() with timing.timeit('step'): for i in range(n_frames): obs, r, d, info = env.step(env.action_space.sample()) if d: env.reset() log.debug('Time %s, FPS %.1f', timing, n_frames / timing.step)
def make_env_func(cfg, env_config): env = create_env(cfg.env, cfg=cfg, env_config=env_config) if not is_multiagent_env(env): env = MultiAgentWrapper(env) if not isinstance(env.observation_space, spaces.Dict): env = DictObservationsWrapper(env) return env
def __init__( self, game=None, frame_skip=4, # Frames per step (>=1). num_img_obs=4, # Number of (past) frames in observation (>=1). clip_reward=True, episodic_lives=True, max_start_noops=30, repeat_action_probability=0., horizon=27000, ): if not game: game = 'doom_battle' cfg = default_cfg(env=game) cfg.wide_aspect_ratio = False self.env = create_env(game, cfg=cfg) self._observation_space = self.env.observation_space gym_action_space = self.env.action_space self._action_space = IntBox( low=0, high=gym_action_space.n) # only for discrete space self.first_reset = True
def __init__(self, level, config, num_action_repeats, seed, runfiles_path=None, level_cache=None): self._observation_spec = ['RGB_INTERLEAVED'] env_name = 'doom_benchmark' cfg = default_cfg(env=env_name, algo=None) cfg.pixel_format = 'HWC' cfg.res_w = DOOM_W cfg.res_h = DOOM_H cfg.wide_aspect_ratio = False self._env = create_env(env_name, cfg=cfg) lock = FileLock(DOOM_LOCK_PATH) attempt = 0 while True: attempt += 1 try: with lock.acquire(timeout=10): print('Env created, resetting...') self._env.reset() print('Env reset completed!') break except Timeout: print( 'Another instance of this application currently holds the lock, attempt:', attempt)
def make_env_func(cfg, env_config): env = create_env(cfg.env, cfg=cfg, env_config=env_config) if not hasattr(env, 'num_agents') or env.num_agents <= 1: env = MultiAgentWrapper(env) if not isinstance(env.observation_space, spaces.Dict): env = DictObservationsWrapper(env) return env
def test_quad_multi_env(self): env_name = 'quadrotor_multi' cfg = default_cfg(env=env_name) self.assertIsNotNone(create_env(env_name, cfg=cfg)) env = create_env(env_name, cfg=cfg) env.reset() n_frames = 1000 timing = Timing() with timing.timeit('step'): for i in range(n_frames): obs, r, d, info = env.step( [env.action_space.sample() for _ in range(env.num_agents)]) log.debug('Time %s, FPS %.1f', timing, n_frames / timing.step)
def initialize(self): # creating an environment in the main process tends to fix some very weird issues further down the line # https://stackoverflow.com/questions/60963839/importing-opencv-after-importing-pytorch-messes-with-cpu-affinity # do not delete this unless you know what you're doing tmp_env = create_env(self.cfg.env, cfg=self.cfg, env_config=None) tmp_env.close() for i in range(self.cfg.num_workers): p = multiprocessing.Process(target=self.sample, args=(i, )) self.processes.append(p)
def main(): env_name = 'doom_battle' env = create_env(env_name, cfg=default_cfg(env=env_name)) env.reset() done = False while not done: env.render() obs, rew, done, info = env.step(env.action_space.sample()) log.info('Done!')
def run_multi_quadrotor_env(env_name, cfg): env = create_env(env_name, cfg=cfg) env.reset() for i in range(100): obs, r, d, info = env.step( [env.action_space.sample() for _ in range(env.num_agents)]) n_frames = 1000 env = create_env(env_name, cfg=cfg) env.reset() timing = Timing() with timing.timeit('step'): for i in range(n_frames): obs, r, d, info = env.step( [env.action_space.sample() for _ in range(env.num_agents)]) log.debug('Time %s, FPS %.1f', timing, n_frames * env.num_agents / timing.step) env.close()
def test_minigrid_env(self): env_name = 'MiniGrid-Empty-Random-5x5-v0' env = create_env(env_name, cfg=default_cfg(env=env_name)) log.info('Env action space: %r', env.action_space) log.info('Env obs space: %r', env.observation_space) env.reset() total_rew = 0 for i in range(1000): obs, rew, done, info = env.step(env.action_space.sample()) total_rew += rew if done: env.reset()
def test_voxel_env(self): env_name = 'voxel_env_Sokoban' env = create_env(env_name, cfg=default_cfg(env=env_name)) log.info('Env action space: %r', env.action_space) log.info('Env obs space: %r', env.observation_space) env.reset() total_rew = 0 for i in range(1000): obs, rew, done, info = env.step([env.action_space.sample() for _ in range(env.num_agents)]) total_rew += sum(rew) log.info('Total rew: %.3f', total_rew)
def __init__(self, level, config, num_action_repeats, seed, runfiles_path=None, level_cache=None): self._observation_spec = ['RGB_INTERLEAVED'] env_name = 'atari_breakout' cfg = default_cfg(env=env_name, algo=None) cfg.pixel_format = 'HWC' cfg.res_w = ATARI_W cfg.res_h = ATARI_H self._env = create_env(env_name, cfg=cfg)
def __init__( self, game=None, frame_skip=4, # Frames per step (>=1). num_img_obs=4, # Number of (past) frames in observation (>=1). clip_reward=True, episodic_lives=True, max_start_noops=30, repeat_action_probability=0., horizon=27000, ): cfg = default_cfg(env=game) cfg.res_w = 96 cfg.res_h = 72 cfg.dmlab_throughput_benchmark = True cfg.dmlab_renderer = 'software' self.env = create_env(game, cfg=cfg) self._observation_space = self.env.observation_space gym_action_space = self.env.action_space self._action_space = IntBox( low=0, high=gym_action_space.n) # only for discrete space
def make_env_func(env_config): return create_env(cfg.env, cfg=cfg, env_config=env_config)
def test_quad_multi_env_with_numba(self): env_name = 'quadrotor_multi' cfg = default_cfg(env=env_name) cfg.quads_use_numba = True self.assertIsNotNone(create_env(env_name, cfg=cfg)) run_multi_quadrotor_env(env_name, cfg)
def test_quad_multi_env(self): env_name = 'quadrotor_multi' cfg = default_cfg(env=env_name) self.assertIsNotNone(create_env(env_name, cfg=cfg)) run_multi_quadrotor_env(env_name, cfg)
def make_env_func(env_config): return create_env(ENV_NAME, cfg=common_config, env_config=env_config)
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() from threadpoolctl import threadpool_limits with threadpool_limits(limits=1, user_api=None): if self.cfg.set_workers_cpu_affinity: set_process_cpu_affinity(proc_idx, self.cfg.num_workers) initial_cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None psutil.Process().nice(10) with timing.timeit('env_init'): envs = [] env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)] for env_idx in range(self.cfg.num_envs_per_worker): global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx env_config = AttrDict(worker_index=proc_idx, vector_index=env_idx, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) log.debug( 'CPU affinity after create_env: %r', psutil.Process().cpu_affinity() if platform != 'darwin' else 'MacOS - None') env.seed(global_env_id) envs.append(env) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key[env_idx] = env.unwrapped.level_name episode_length = [0 for _ in envs] episode_lengths = [deque([], maxlen=20) for _ in envs] try: with timing.timeit('first_reset'): for env_idx, env in enumerate(envs): env.reset() log.info('Process %d finished resetting %d/%d envs', proc_idx, env_idx + 1, len(envs)) self.report_queue.put( dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: for env_idx, env in enumerate(envs): action = env.action_space.sample() with timing.add_time(f'{env_key[env_idx]}.step'): obs, reward, done, info = env.step(action) num_frames = info.get('num_frames', 1) total_env_frames += num_frames episode_length[env_idx] += num_frames if done: with timing.add_time( f'{env_key[env_idx]}.reset'): env.reset() episode_lengths[env_idx].append( episode_length[env_idx]) episode_length[env_idx] = 0 with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) # Extra check to make sure cpu affinity is preserved throughout the execution. # I observed weird effect when some environments tried to alter affinity of the current process, leading # to decreased performance. # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc. # At least user should know about it if this is happening. cpu_affinity = psutil.Process().cpu_affinity( ) if platform != 'darwin' else None assert initial_cpu_affinity == cpu_affinity, \ f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \ f'This can significantly affect performance!' except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.01 + 0.01) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) for env_idx, env in enumerate(envs): if len(episode_lengths[env_idx]) > 0: log.warning('Level %s avg episode len %d', env_key[env_idx], np.mean(episode_lengths[env_idx])) for env in envs: env.close()
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() psutil.Process().nice(10) num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE) assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...' assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker' with timing.timeit('env_init'): env_key = 'env' env_desired_num_levels = 0 global_env_id = proc_idx * self.cfg.num_envs_per_worker env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) env.seed(global_env_id) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key = env.unwrapped.level_name env_level = env.unwrapped.level approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[ env_key] num_billions = DESIRED_TRAINING_LENGTH / int(1e9) num_workers_for_env = self.cfg.num_workers // num_envs env_desired_num_levels = int( (approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env) env_num_levels_generated = len(dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0]. all_seeds[env_level]) // num_workers_for_env log.warning('Worker %d (env %s) generated %d/%d levels!', proc_idx, env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(4) env.reset() env_uses_level_cache = env.unwrapped.env_uses_level_cache self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() try: with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: action = env.action_space.sample() with timing.add_time(f'{env_key}.step'): env.step(action) total_env_frames += 1 with timing.add_time(f'{env_key}.reset'): env.reset() env_num_levels_generated += 1 log.debug('Env %s done %d/%d resets', env_key, env_num_levels_generated, env_desired_num_levels) if env_num_levels_generated >= env_desired_num_levels: log.debug('%s finished %d/%d resets, sleeping...', env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(30) # free up CPU time for other envs # if env does not use level cache, there is no need to run it # let other workers proceed if not env_uses_level_cache: log.debug('Env %s does not require cache, sleeping...', env_key) time.sleep(200) with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) if get_free_disk_space_mb(self.cfg) < 3 * 1024: log.error('Not enough disk space! %d', get_free_disk_space_mb(self.cfg)) time.sleep(200) except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.1 + 0.1) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) env.close()