def _game_init(self, with_locking=True, max_parallel=10): lock_file = lock = None if with_locking: lock_file = doom_lock_file(max_parallel) lock = FileLock(lock_file) init_attempt = 0 while True: init_attempt += 1 try: if with_locking: with lock.acquire(timeout=20): self.game.init() else: self.game.init() break except Timeout: if with_locking: log.debug( 'Another process currently holds the lock %s, attempt: %d', lock_file, init_attempt, ) except Exception as exc: log.warning( 'VizDoom game.init() threw an exception %r. Terminate process...', exc) from seed_rl.envs.env_utils import EnvCriticalError raise EnvCriticalError()
def find_available_port(start_port, increment=1000): port = start_port while port < 65535 and not is_udp_port_available(port): port += increment log.debug('Port %r is available', port) return port
def load_from_checkpoint(cfg): filename = cfg_file(cfg) if not os.path.isfile(filename): raise Exception( f'Could not load saved parameters for experiment {cfg.experiment}') with open(filename, 'r') as json_file: json_params = json.load(json_file) log.warning('Loading existing experiment configuration from %s', filename) loaded_cfg = AttrDict(json_params) # override the parameters in config file with values passed from command line for key, value in vars(cfg.cli_args).items(): if loaded_cfg[key] != value: log.debug( 'Overriding arg %r with value %r passed from command line', key, value) loaded_cfg[key] = value # incorporate extra CLI parameters that were not present in JSON file for key, value in vars(cfg).items(): if key not in loaded_cfg: log.debug( 'Adding new argument %r=%r that is not in the saved config file!', key, value) loaded_cfg[key] = value return loaded_cfg
def __init__(self, cfg, obs_space, timing): super().__init__(cfg, timing) self.basic_encoder = create_standard_encoder(cfg, obs_space, timing) self.encoder_out_size = self.basic_encoder.encoder_out_size # same as IMPALA paper self.embedding_size = 20 self.instructions_lstm_units = 64 self.instructions_lstm_layers = 1 padding_idx = 0 self.word_embedding = nn.Embedding( num_embeddings=DMLAB_VOCABULARY_SIZE, embedding_dim=self.embedding_size, padding_idx=padding_idx) self.instructions_lstm = nn.LSTM( input_size=self.embedding_size, hidden_size=self.instructions_lstm_units, num_layers=self.instructions_lstm_layers, batch_first=True, ) # learnable initial state? # initial_hidden_values = torch.normal(0, 1, size=(self.instructions_lstm_units, )) # self.lstm_h0 = nn.Parameter(initial_hidden_values, requires_grad=True) # self.lstm_c0 = nn.Parameter(initial_hidden_values, requires_grad=True) self.encoder_out_size += self.instructions_lstm_units log.debug('Policy head output size: %r', self.encoder_out_size) self.cpu_device = torch.device('cpu')
def predict(self, imagined_action_lists): start = time.time() assert len(imagined_action_lists) == self.num_envs imagined_action_lists = np.split(np.array(imagined_action_lists), self.num_workers) for worker, imagined_action_list in zip(self.workers, imagined_action_lists): worker.task_queue.put( (imagined_action_list, MsgType.STEP_IMAGINED)) observations = [] rewards = [] dones = [] for worker in self.workers: worker.task_queue.join() results_per_worker = safe_get( worker.result_queue, timeout=1.0, msg= 'Took a surprisingly long time to predict the future, retrying...', ) assert len(results_per_worker) == len(imagined_action_lists[0]) for result in results_per_worker: o, r, d, _ = zip(*result) observations.append(o) rewards.append(r) dones.append(d) worker.result_queue.task_done() if self._verbose: log.debug('Prediction step took %.4f s', time.time() - start) return observations, rewards, dones
def step(self, actions): if self.skip_frames > 1 or self.num_agents == 1: # not used in multi-agent mode due to VizDoom limitations # this means that we have only one agent (+ maybe some bots, which is why we're in multiplayer mode) return super().step(actions) self._ensure_initialized() actions_binary = self._convert_actions(actions) self.game.set_action(actions_binary) self.game.advance_action(1, self.update_state) self.timestep += 1 if not self.update_state: return None, None, None, None state = self.game.get_state() reward = self.game.get_last_reward() done = self.game.is_episode_finished() if self.record_to is not None: # send 'stop recording' command 1 tick before the end of the episode # otherwise it does not get saved to disk if self.game.get_episode_time( ) + 1 == self.game.get_episode_timeout(): log.debug('Calling stop recording command!') self.game.send_game_command('stop') observation, done, info = self._process_game_step(state, done, {}) return observation, reward, done, info
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', type=str, default=None, required=True) parser.add_argument('--demo_path', type=str, default=None, required=True) args = parser.parse_args() spec = doom_env_by_name(args.env) cfg = default_cfg(env=args.env) if spec.num_agents <= 1: env = make_doom_env(args.env, cfg=cfg, custom_resolution='640x480') else: env = make_doom_env_impl( spec, cfg=cfg, custom_resolution='640x480', player_id=0, num_agents=spec.num_agents, max_num_players=spec.num_agents, num_bots=spec.num_bots, ) mode = 'replay' env.unwrapped.mode = mode env.unwrapped.initialize() game = env.unwrapped.game game.replay_episode(args.demo_path) frames_dir = args.demo_path + '_frames' if os.path.exists(frames_dir): shutil.rmtree(frames_dir) os.makedirs(frames_dir) frame_id = 0 while not game.is_episode_finished(): # Use advance_action instead of make_action. game.advance_action() img = env.render(mode='rgb_array') frame_name = f'{frame_id:05d}.png' img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) if img is not None: cv2.imwrite(join(frames_dir, frame_name), img) frame_id += 1 r = game.get_last_reward() log.debug('Reward %.3f at frame %d', r, frame_id) game.close()
def dbg_print(self): dbg_info = dict( entropy=self.entropy().mean(), kl_prior=self.kl_prior().mean(), min_logit=self.logits.min(), max_logit=self.logits.max(), min_prob=self.probs.min(), max_prob=self.probs.max(), ) msg = '' for key, value in dbg_info.items(): msg += f'{key}={value.cpu().item():.3f} ' log.debug(msg)
def __init__(self, env, initial_difficulty=None): super().__init__(env) self._min_difficulty = 0 self._max_difficulty = 150 self._difficulty_step = 10 self._curr_difficulty = 20 if initial_difficulty is None else initial_difficulty self._difficulty_std = 10 log.info('Starting with bot difficulty %d', self._curr_difficulty) self._adaptive_curriculum = True if initial_difficulty == self._max_difficulty: log.debug( 'Starting at max difficulty, disable adaptive skill curriculum' ) self._adaptive_curriculum = False
def _ensure_initialized(self): if self.initialized: return self.workers = [ MultiAgentEnvWorker(i, self.make_env_func, self.env_config, reset_on_init=self.reset_on_init) for i in range(self.num_agents) ] init_attempt = 0 while True: init_attempt += 1 try: port_to_use = udp_port_num(self.env_config) port = find_available_port(port_to_use, increment=1000) log.debug('Using port %d', port) init_info = dict(port=port) lock_file = doom_lock_file(max_parallel=20) lock = FileLock(lock_file) with lock.acquire(timeout=10): for i, worker in enumerate(self.workers): worker.task_queue.put((init_info, TaskType.INIT)) if self.safe_init: time.sleep(1.0) # just in case else: time.sleep(0.05) for i, worker in enumerate(self.workers): worker.result_queue.get(timeout=5) worker.result_queue.task_done() worker.task_queue.join() except filelock.Timeout: continue except Exception: raise RuntimeError( 'Critical error: worker stuck on initialization. Abort!') else: break log.debug('%d agent workers initialized for env %d!', len(self.workers), self.env_config.worker_index) self.initialized = True
def initialize(self): checkpoint_dict = self._load_checkpoint(self._checkpoint_dir()) if checkpoint_dict is None: log.debug('Did not load from checkpoint, starting from scratch!') else: log.debug('Loading model from checkpoint') self._load_state(checkpoint_dict) log.debug('Experiment parameters:') for key, value in self._cfg_dict().items(): log.debug('\t %s: %r', key, value)
def init_multiplayer_env(make_env_func, player_id, env_config, init_info=None): env = make_env_func(player_id=player_id) if env_config is not None and 'worker_index' in env_config: env.unwrapped.worker_index = env_config.worker_index if env_config is not None and 'vector_index' in env_config: env.unwrapped.vector_index = env_config.vector_index if init_info is None: port_to_use = udp_port_num(env_config) port = find_available_port(port_to_use, increment=1000) log.debug('Using port %d', port) init_info = dict(port=port) env.unwrapped.init_info = init_info env.seed(env.unwrapped.worker_index * 1000 + env.unwrapped.vector_index * 10 + player_id) return env
def __init__(self, num_envs, num_workers, make_env_func, stats_episodes, use_multiprocessing=True): tmp_env = make_env_func(None) if hasattr(tmp_env, 'num_agents'): self.num_agents = tmp_env.num_agents else: raise Exception('Expected multi-agent environment') global DEFAULT_UDP_PORT DEFAULT_UDP_PORT = find_available_port(DEFAULT_UDP_PORT) log.debug('Default UDP port changed to %r', DEFAULT_UDP_PORT) time.sleep(0.1) super().__init__(num_envs, num_workers, make_env_func, stats_episodes, use_multiprocessing)
def record_used_seed(self, level, seed): self.num_seeds_used_in_current_run[level].value += 1 log.debug('Updated number of used seeds for level %s (%d)', level, self.num_seeds_used_in_current_run[level].value) used_lvl_seeds_dir = self.get_used_seeds_dir() used_seeds_filename = join(used_lvl_seeds_dir, level_to_filename(level)) safe_ensure_dir_exists(os.path.dirname(used_seeds_filename)) with open(used_seeds_filename, 'a') as fobj: fobj.write(f'{seed}\n') # this data structure is not shared across processes, but we mostly care about the initial # seeds anyway, which are initialized before the processes are forked if level not in self.used_seeds: self.used_seeds[level] = {seed} else: self.used_seeds[level].add(seed)
def _save(self): checkpoint = self._get_checkpoint_dict() assert checkpoint is not None filepath = join( self._checkpoint_dir(), f'checkpoint_{self.train_step:09d}_{self.env_steps}.pth') log.info('Saving %s...', filepath) torch.save(checkpoint, filepath) while len(self._get_checkpoints( self._checkpoint_dir())) > self.cfg.keep_checkpoints: oldest_checkpoint = self._get_checkpoints( self._checkpoint_dir())[0] if os.path.isfile(oldest_checkpoint): log.debug('Removing %s', oldest_checkpoint) os.remove(oldest_checkpoint) self._save_cfg()
def __init__(self, env_indices, make_env_func, use_multiprocessing): self._verbose = False self.make_env_func = make_env_func self.env_indices = env_indices self.timestep = 0 if use_multiprocessing: self.task_queue, self.result_queue = JoinableQueue( ), JoinableQueue() self.process = Process(target=self.start, daemon=False) else: log.debug('Not using multiprocessing!') self.task_queue, self.result_queue = Queue(), Queue() # shouldn't be named "process" here, but who cares self.process = threading.Thread(target=self.start) self.is_multiagent = False self.process.start()
def __init__(self, num_agents, make_env_func, env_config, skip_frames): self.num_agents = num_agents log.debug('Multi agent env, num agents: %d', self.num_agents) self.skip_frames = skip_frames # number of frames to skip (1 = no skip) env = make_env_func( player_id=-1 ) # temporary env just to query observation_space and stuff self.action_space = env.action_space self.observation_space = env.observation_space # we can probably do this in a more generic way, but good enough for now self.default_reward_shaping = None if hasattr(env.unwrapped, '_reward_shaping_wrapper'): # noinspection PyProtectedMember self.default_reward_shaping = env.unwrapped._reward_shaping_wrapper.reward_shaping_scheme env.close() self.make_env_func = make_env_func self.safe_init = env_config is not None and env_config.get( 'safe_init', False) if self.safe_init: sleep_seconds = env_config.worker_index * 1.0 log.info( 'Sleeping %.3f seconds to avoid creating all envs at once', sleep_seconds) time.sleep(sleep_seconds) log.info('Done sleeping at %d', env_config.worker_index) self.env_config = env_config self.workers = None # only needed when rendering self.enable_rendering = False self.last_obs = None self.reset_on_init = True self.initialized = False
def make_dmlab_env_impl(spec, cfg, env_config, **kwargs): skip_frames = cfg.env_frameskip gpu_idx = 0 if len(cfg.dmlab_gpus) > 0: if kwargs.get('env_config') is not None: vector_index = kwargs['env_config']['vector_index'] gpu_idx = cfg.dmlab_gpus[vector_index % len(cfg.dmlab_gpus)] log.debug('Using GPU %d for DMLab rendering!', gpu_idx) task_id = get_task_id(env_config, spec, cfg) level = task_id_to_level(task_id, spec) log.debug('%r level %s task id %d', env_config, level, task_id) env = DmlabGymEnv( task_id, level, skip_frames, cfg.res_w, cfg.res_h, cfg.dmlab_throughput_benchmark, cfg.dmlab_renderer, get_dataset_path(cfg), cfg.dmlab_with_instructions, cfg.dmlab_extended_action_set, cfg.dmlab_use_level_cache, gpu_idx, spec.extra_cfg, ) if env_config and 'env_id' in env_config: env.seed(env_config['env_id']) if 'record_to' in cfg and cfg.record_to is not None: env = RecordingWrapper(env, cfg.record_to, 0) if cfg.pixel_format == 'CHW': env = PixelFormatChwWrapper(env) env = DmlabRewardShapingWrapper(env) return env
def create_multi_env(num_envs, num_workers, make_env_func, stats_episodes): """ Create a vectorized env for single- and multi-agent case. This is only required for synchronous algorithms such as PPO and A2C. APPO uses a different mechanism with separate worker processes. """ tmp_env = make_env_func(None) is_multiagent = hasattr(tmp_env, 'num_agents') and tmp_env.num_agents > 1 if is_multiagent: assert num_envs % tmp_env.num_agents == 0 log.debug('Num envs %d agents %d', num_envs, tmp_env.num_agents) num_envs = num_envs // tmp_env.num_agents from seed_rl.envs.doom.multiplayer.doom_multiagent_wrapper import MultiAgentEnvAggregator multi_env = MultiAgentEnvAggregator(num_envs, num_workers, make_env_func, stats_episodes) else: from seed_rl.algorithms.utils.multi_env import MultiEnv multi_env = MultiEnv(num_envs, num_workers, make_env_func, stats_episodes) tmp_env.close() return multi_env
def test_doom_multiagent_multi_env(self): agents_per_env = 6 num_envs = 2 num_workers = 2 skip_frames = 2 # hardcoded multi_env = MultiAgentEnvAggregator( num_envs=num_envs, num_workers=num_workers, make_env_func=self.make_standard_dm, stats_episodes=10, use_multiprocessing=True, ) log.info('Before reset...') multi_env.reset() log.info('After reset...') actions = [multi_env.action_space.sample()] * (agents_per_env * num_envs) obs, rew, done, info = multi_env.step(actions) log.info('Rewards: %r', rew) start = time.time() num_steps = 300 for i in range(num_steps): obs, rew, done, info = multi_env.step(actions) if i % 50 == 0: log.debug('Steps %d, rew: %r', i, rew) took = time.time() - start log.debug('Took %.3f sec to run %d steps, steps/sec: %.1f', took, num_steps, num_steps / took) log.debug('Observations fps: %.1f', num_steps * multi_env.num_agents * num_envs / took) log.debug('Environment fps: %.1f', num_steps * multi_env.num_agents * num_envs * skip_frames / took) multi_env.close() log.info('Done!')
def _ensure_initialized(self): if self.initialized: # Doom env already initialized! return self._create_doom_game(self.mode) port = DEFAULT_UDP_PORT if self.init_info is None else self.init_info.get( 'port', DEFAULT_UDP_PORT) if self._is_server(): log.info('Using port %d on host...', port) if not is_udp_port_available(port): raise Exception('Port %r unavailable', port) # This process will function as a host for a multiplayer game with this many players (including the host). # It will wait for other machines to connect using the -join parameter and then # start the game when everyone is connected. game_args_list = [ f'-host {self.max_num_players}', f'-port {port}', '-deathmatch', # Deathmatch rules are used for the game. f'+timelimit {self.timelimit}', # The game (episode) will end after this many minutes have elapsed. '+sv_forcerespawn 1', # Players will respawn automatically after they die. '+sv_noautoaim 1', # Autoaim is disabled for all players. '+sv_respawnprotect 1', # Players will be invulnerable for two second after spawning. '+sv_spawnfarthest 1', # Players will be spawned as far as possible from any other players. '+sv_nocrouch 1', # Disables crouching. '+sv_nojump 1', # Disables jumping. '+sv_nofreelook 1', # Disables free look with a mouse (only keyboard). '+sv_noexit 1', # Prevents players from exiting the level in deathmatch before timelimit is hit. f'+viz_respawn_delay {self.respawn_delay}', # Sets delay between respanws (in seconds). '+viz_connect_timeout 4', # In seconds ] self.game.add_game_args(' '.join(game_args_list)) # Additional commands: # # disables depth and labels buffer and the ability to use commands # that could interfere with multiplayer game (should use this in evaluation) # '+viz_nocheat 1' # Name your agent and select color # colors: # 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue self.game.add_game_args( f'+name AI{self.player_id}_host +colorset 0') if self.record_to is not None: # reportedly this does not work with bots demo_path = self.demo_path(self._num_episodes) log.debug('Recording multiplayer demo to %s', demo_path) self.game.add_game_args(f'-record {demo_path}') else: # Join existing game. self.game.add_game_args( f'-join 127.0.0.1:{port} ' # Connect to a host for a multiplayer game. '+viz_connect_timeout 4 ') # Name your agent and select color # colors: # 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue self.game.add_game_args(f'+name AI{self.player_id} +colorset 0') self.game.set_episode_timeout( int(self.timelimit * 60 * self.game.get_ticrate())) self._game_init(with_locking=False ) # locking is handled by the multi-agent wrapper log.info('Initialized w:%d v:%d player:%d', self.worker_index, self.vector_index, self.player_id) self.initialized = True
def start(self): real_envs = [] imagined_envs = None timing = AttrDict({'copying': 0, 'prediction': 0}) while True: actions, msg_type = safe_get(self.task_queue) if msg_type == MsgType.INIT: self._init(real_envs) self.task_queue.task_done() continue if msg_type == MsgType.TERMINATE: self._terminate(real_envs, imagined_envs) self.task_queue.task_done() break # handling actual workload envs = real_envs if msg_type == MsgType.RESET or msg_type == MsgType.STEP_REAL or msg_type == MsgType.STEP_REAL_RESET: if imagined_envs is not None: for imagined_env in imagined_envs: imagined_env.close() imagined_envs = None elif msg_type == MsgType.INFO: pass else: if imagined_envs is None: # initializing new prediction, let's report timing for the previous one if timing.prediction > 0 and self._verbose: log.debug( 'Multi-env copy took %.6f s, prediction took %.6f s', timing.copying, timing.prediction, ) timing.prediction = 0 timing.copying = time.time() imagined_envs = [] # we expect a list of actions for every environment in this worker (list of lists) assert len(actions) == len(real_envs) for env_idx in range(len(actions)): for _ in actions[env_idx]: imagined_env = copy.deepcopy(real_envs[env_idx]) imagined_envs.append(imagined_env) timing.copying = time.time() - timing.copying envs = imagined_envs actions = np.asarray(actions).flatten() if msg_type == MsgType.RESET: results = [env.reset() for env in envs] elif msg_type == MsgType.INFO: results = [self._get_info(env) for env in envs] else: assert len(envs) == len(actions) reset = [False] * len(actions) if msg_type == MsgType.STEP_REAL_RESET: actions, reset = zip(*actions) # Collect obs, reward, done, and info prediction_start = time.time() results = [ env.step(action) for env, action in zip(envs, actions) ] self.timestep += 1 # pack results per-env results = np.split(np.array(results), len(real_envs)) if msg_type == MsgType.STEP_IMAGINED: timing.prediction += time.time() - prediction_start # If this is a real step and the env is done, reset if msg_type == MsgType.STEP_REAL or msg_type == MsgType.STEP_REAL_RESET: for i, result in enumerate(results): obs, reward, done, info = result[0] if self.is_multiagent and all(done): is_done = True elif not self.is_multiagent and done: is_done = True else: is_done = False if is_done or reset[i]: obs = real_envs[i].reset() if not self.is_multiagent: info = self._get_info( real_envs[i]) # info for the new episode results[i] = (obs, reward, done, info) self.result_queue.put(results) self.task_queue.task_done()
def write(self, key, pk3_path): """Environment object itself acts as a proxy to the global level cache.""" log.debug('Add new level to cache! Level %s seed %r key %s', self.level_name, self.last_reset_seed, key) self.curr_cache.add_new_level(self.level, self.last_reset_seed, key, pk3_path)
def __init__(self, cache_dir, experiment_dir, all_levels_for_experiment, policy_idx): self.cache_dir = cache_dir self.experiment_dir = experiment_dir self.policy_idx = policy_idx self.all_seeds = dict() self.available_seeds = dict() self.used_seeds = dict() self.num_seeds_used_in_current_run = dict() self.locks = dict() for lvl in all_levels_for_experiment: self.all_seeds[lvl] = [] self.available_seeds[lvl] = [] self.num_seeds_used_in_current_run[lvl] = multiprocessing.RawValue( ctypes.c_int32, 0) self.locks[lvl] = multiprocessing.Lock() log.debug('Reading the DMLab level cache...') cache_dir = ensure_dir_exists(cache_dir) lvl_seed_files = Path(cache_dir).rglob(f'*.{LEVEL_SEEDS_FILE_EXT}') for lvl_seed_file in lvl_seed_files: lvl_seed_file = str(lvl_seed_file) level = filename_to_level(os.path.relpath(lvl_seed_file, cache_dir)) self.all_seeds[level] = read_seeds_file(lvl_seed_file, has_keys=True) self.all_seeds[level] = list(set( self.all_seeds[level])) # leave only unique seeds log.debug('Level %s has %d total seeds available', level, len(self.all_seeds[level])) log.debug('Updating level cache for the current experiment...') used_lvl_seeds_dir = self.get_used_seeds_dir() used_seeds_files = Path(used_lvl_seeds_dir).rglob( f'*.{LEVEL_SEEDS_FILE_EXT}') self.used_seeds = dict() for used_seeds_file in used_seeds_files: used_seeds_file = str(used_seeds_file) level = filename_to_level( os.path.relpath(used_seeds_file, used_lvl_seeds_dir)) self.used_seeds[level] = read_seeds_file(used_seeds_file, has_keys=False) log.debug('%d seeds already used in this experiment for level %s', len(self.used_seeds[level]), level) self.used_seeds[level] = set(self.used_seeds[level]) for lvl in all_levels_for_experiment: lvl_seeds = self.all_seeds.get(lvl, []) lvl_used_seeds = self.used_seeds.get(lvl, []) lvl_remaining_seeds = set(lvl_seeds) - set(lvl_used_seeds) self.available_seeds[lvl] = list(lvl_remaining_seeds) same_levels_for_population = False if same_levels_for_population: # shuffle with fixed seed so agents in population get the same levels random.Random(42).shuffle(self.available_seeds[lvl]) else: random.shuffle(self.available_seeds[lvl]) log.debug('Env %s has %d remaining unused seeds', lvl, len(self.available_seeds[lvl])) log.debug('Done initializing global DMLab level cache!')
def sample(self, proc_idx): # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg signal.signal(signal.SIGINT, signal.SIG_IGN) timing = Timing() psutil.Process().nice(10) num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE) assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...' assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker' with timing.timeit('env_init'): env_key = 'env' env_desired_num_levels = 0 global_env_id = proc_idx * self.cfg.num_envs_per_worker env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id) env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config) env.seed(global_env_id) # this is to track the performance for individual DMLab levels if hasattr(env.unwrapped, 'level_name'): env_key = env.unwrapped.level_name env_level = env.unwrapped.level approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[ env_key] num_billions = DESIRED_TRAINING_LENGTH / int(1e9) num_workers_for_env = self.cfg.num_workers // num_envs env_desired_num_levels = int( (approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env) env_num_levels_generated = len( dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0]. all_seeds[env_level]) // num_workers_for_env log.warning('Worker %d (env %s) generated %d/%d levels!', proc_idx, env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(4) env.reset() env_uses_level_cache = env.unwrapped.env_uses_level_cache self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True)) self.start_event.wait() try: with timing.timeit('work'): last_report = last_report_frames = total_env_frames = 0 while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker: action = env.action_space.sample() with timing.add_time(f'{env_key}.step'): env.step(action) total_env_frames += 1 with timing.add_time(f'{env_key}.reset'): env.reset() env_num_levels_generated += 1 log.debug('Env %s done %d/%d resets', env_key, env_num_levels_generated, env_desired_num_levels) if env_num_levels_generated >= env_desired_num_levels: log.debug('%s finished %d/%d resets, sleeping...', env_key, env_num_levels_generated, env_desired_num_levels) time.sleep(30) # free up CPU time for other envs # if env does not use level cache, there is no need to run it # let other workers proceed if not env_uses_level_cache: log.debug('Env %s does not require cache, sleeping...', env_key) time.sleep(200) with timing.add_time('report'): now = time.time() if now - last_report > self.report_every_sec: last_report = now frames_since_last_report = total_env_frames - last_report_frames last_report_frames = total_env_frames self.report_queue.put( dict(proc_idx=proc_idx, env_frames=frames_since_last_report)) if get_free_disk_space_mb() < 3 * 1024: log.error('Not enough disk space! %d', get_free_disk_space_mb()) time.sleep(200) except: log.exception('Unknown exception') log.error('Unknown exception in worker %d, terminating...', proc_idx) self.report_queue.put(dict(proc_idx=proc_idx, crash=True)) time.sleep(proc_idx * 0.1 + 0.1) log.info('Process %d finished sampling. Timing: %s', proc_idx, timing) env.close()
def play_human_mode(env, skip_frames=1, num_episodes=3, num_actions=None): from pynput.keyboard import Listener doom = env.unwrapped doom.skip_frames = 1 # handled by this script separately # noinspection PyProtectedMember def start_listener(): with Listener(on_press=doom._keyboard_on_press, on_release=doom._keyboard_on_release) as listener: listener.join() listener_thread = Thread(target=start_listener) listener_thread.start() for episode in range(num_episodes): doom.mode = 'human' env.reset() last_render_time = time.time() time_between_frames = 1.0 / 35.0 total_rew = 0.0 while not doom.game.is_episode_finished() and not doom._terminate: num_actions = 14 if num_actions is None else num_actions turn_delta_action_idx = num_actions - 1 actions = [0] * num_actions for action in doom._current_actions: if isinstance(action, int): actions[ action] = 1 # 1 for buttons currently pressed, 0 otherwise else: if action == 'turn_left': actions[ turn_delta_action_idx] = -doom.delta_actions_scaling_factor elif action == 'turn_right': actions[ turn_delta_action_idx] = doom.delta_actions_scaling_factor for frame in range(skip_frames): doom._actions_flattened = actions _, rew, _, _ = env.step(actions) new_total_rew = total_rew + rew if new_total_rew != total_rew: log.info('Reward: %.3f, total: %.3f', rew, new_total_rew) total_rew = new_total_rew state = doom.game.get_state() verbose = True if state is not None and verbose: info = doom.get_info() print( 'Health:', info['HEALTH'], # 'Weapon:', info['SELECTED_WEAPON'], # 'ready:', info['ATTACK_READY'], # 'ammo:', info['SELECTED_WEAPON_AMMO'], # 'pc:', info['PLAYER_COUNT'], # 'dmg:', info['DAMAGECOUNT'], ) time_since_last_render = time.time() - last_render_time time_wait = time_between_frames - time_since_last_render if doom.show_automap and state.automap_buffer is not None: map_ = state.automap_buffer map_ = np.swapaxes(map_, 0, 2) map_ = np.swapaxes(map_, 0, 1) cv2.imshow('ViZDoom Automap Buffer', map_) if time_wait > 0: cv2.waitKey(int(time_wait) * 1000) else: if time_wait > 0: time.sleep(time_wait) last_render_time = time.time() if doom.show_automap: cv2.destroyAllWindows() log.debug('Press ESC to exit...') listener_thread.join()