def _game_init(self, with_locking=True, max_parallel=10): lock_file = lock = None if with_locking: lock_file = doom_lock_file(max_parallel) lock = FileLock(lock_file) init_attempt = 0 while True: init_attempt += 1 try: if with_locking: with lock.acquire(timeout=20): self.game.init() else: self.game.init() break except Timeout: if with_locking: log.debug( 'Another process currently holds the lock %s, attempt: %d', lock_file, init_attempt, ) except Exception as exc: log.warning('VizDoom game.init() threw an exception %r. Terminate process...', exc) from envs.env_utils import EnvCriticalError raise EnvCriticalError()
def add_edge(self, i1, i2, loop_closure=False): initial_success = 0.01 # add to params? if i2 in self.graph[i1]: log.warning('Edge %d-%d already exists (%r)! Overriding!', i1, i2, self.graph[i1]) self.graph.add_edge( i1, i2, success=initial_success, last_traversal_frames=math.inf, attempted_traverse=0, loop_closure=loop_closure, ) if not self.directed_graph: if i1 in self.graph[i2]: log.warning('Edge %d-%d already exists (%r)! Overriding!', i2, i1, self.graph[i2]) self.graph.add_edge( i2, i1, success=initial_success, last_traversal_frames=math.inf, attempted_traverse=0, loop_closure=loop_closure, )
def safe_get(q, timeout=1e6, msg='Queue timeout'): """Using queue.get() with timeout is necessary, otherwise KeyboardInterrupt is not handled.""" while True: try: return q.get(timeout=timeout) except Empty: log.warning(msg)
def cat(self, dict_of_tensor_arrays, macro_batch_size, use_pinned_memory, timing): """ Here 'macro_batch' is the overall size of experience per iteration. Macro-batch = mini-batch * num_batches_per_iteration """ tensor_batch = self.batch_pool.get() if tensor_batch is not None: old_batch_size = tensor_batch_size(tensor_batch) if old_batch_size != macro_batch_size: # this can happen due to PBT changing batch size during the experiment log.warning('Tensor macro-batch size changed from %d to %d!', old_batch_size, macro_batch_size) log.warning('Discarding the cached tensor batch!') del tensor_batch tensor_batch = None if tensor_batch is None: tensor_batch = copy_dict_structure(dict_of_tensor_arrays) log.info('Allocating new CPU tensor batch (could not get from the pool)') for d1, cache_d, key, tensor_arr, _ in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch): cache_d[key] = torch.cat(tensor_arr, dim=0) if use_pinned_memory: cache_d[key] = cache_d[key].pin_memory() else: with timing.add_time('batcher_mem'): for d1, cache_d, key, tensor_arr, cache_t in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch): offset = 0 for t in tensor_arr: first_dim = t.shape[0] cache_t[offset:offset + first_dim].copy_(t) offset += first_dim return tensor_batch
def load_from_checkpoint(cfg): filename = cfg_file(cfg) if not os.path.isfile(filename): raise Exception( f'Could not load saved parameters for experiment {cfg.experiment}') with open(filename, 'r') as json_file: json_params = json.load(json_file) log.warning('Loading existing experiment configuration from %s', filename) loaded_cfg = AttrDict(json_params) # override the parameters in config file with values passed from command line for key, value in cfg.cli_args.items(): if key in loaded_cfg and loaded_cfg[key] != value: log.debug( 'Overriding arg %r with value %r passed from command line', key, value) loaded_cfg[key] = value # incorporate extra CLI parameters that were not present in JSON file for key, value in vars(cfg).items(): if key not in loaded_cfg: log.debug( 'Adding new argument %r=%r that is not in the saved config file!', key, value) loaded_cfg[key] = value return loaded_cfg
def add_new_level(self, level, seed, key, pk3_path): with self.locks[level]: num_used_seeds = self.num_seeds_used_in_current_run[level].value if num_used_seeds < len(self.available_seeds.get(level, [])): log.warning( 'We should only add new levels to cache if we ran out of pre-generated levels (seeds)' ) log.warning( 'Num used seeds: %d, available seeds: %d, level: %s, seed %r, key %r', num_used_seeds, len(self.available_seeds.get(level, [])), level, seed, key, ) # some DMLab-30 environments, e.g. language_select_located_object may require different levels even # for the same seed. This is most likely a bug in DeepMind Lab, because the same seed should generate # identical environments path = os.path.join(self.cache_dir, key) if not os.path.isfile(path): # copy the cached file DeepMind Lab has written to the cache directory shutil.copyfile(pk3_path, path) # add new map to the list of available seeds for this level # so it can be used next time we run the experiment lvl_seeds_filename = join(self.cache_dir, level_to_filename(level)) safe_ensure_dir_exists(os.path.dirname(lvl_seeds_filename)) with open(lvl_seeds_filename, 'a') as fobj: fobj.write(f'{seed} {key}\n')
def action_name_to_action(action_name): for action, name in ACTION_MEANING.items(): if name == action_name: return action log.warning('Unknown action %s', action_name) return None
def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs): scenario_name = env_name.split('voxel_env_')[-1].casefold() log.debug('Using scenario %s', scenario_name) if 'multitask' in scenario_name: if env_config is not None and 'worker_index' in env_config: task_idx = env_config['worker_index'] else: log.warning( 'Could not find information about task id. Use task_id=0. (It is okay if this message appears once)' ) task_idx = 0 env = make_env_multitask( scenario_name, task_idx, num_envs=cfg.voxel_num_envs_per_instance, num_agents_per_env=cfg.voxel_num_agents_per_env, num_simulation_threads=cfg.voxel_num_simulation_threads, use_vulkan=cfg.voxel_use_vulkan, ) else: env = VoxelEnv( scenario_name=scenario_name, num_envs=cfg.voxel_num_envs_per_instance, num_agents_per_env=cfg.voxel_num_agents_per_env, num_simulation_threads=cfg.voxel_num_simulation_threads, use_vulkan=cfg.voxel_use_vulkan, ) env = Wrapper(env, cfg.voxel_increase_team_spirit, cfg.voxel_max_team_spirit_steps) return env
def _add_shortcuts(self, m, pairwise_distances): self.remove_shortcuts(m) # first - remove all existing shortcuts shortcuts = self._shortcuts_distance( m, pairwise_distances, self.params.min_shortcut_dist, self.params.shortcut_window, ) if len(shortcuts) <= 0: log.warning('Could not find any shortcuts') return random.shuffle(shortcuts) shortcut_risks = [s[0] for s in shortcuts] shortcuts_to_keep = int(self.params.shortcuts_to_keep_fraction * m.num_landmarks()) keep = min(shortcuts_to_keep, len(shortcuts)) percentile = (keep / len(shortcuts)) * 100 max_risk = np.percentile(shortcut_risks, percentile) max_risk = min(max_risk, self.params.shortcut_risk_threshold) log.debug('Keep shortcuts with risk <= %.3f...', max_risk) shortcuts = [s for s in shortcuts if s[0] <= max_risk][:keep] shortcuts.sort(key=lambda x: x[-1], reverse=True ) # sort according to ground truth distance for logging log.debug('Kept %d shortcuts: %r...', len(shortcuts), shortcuts[:5]) for shortcut in shortcuts: risk, i1, i2, d, coord_dist = shortcut m.add_edge(i1, i2, loop_closure=True)
def _ensure_initialized(self): if self.initialized: return num_attempts = 25 attempt = 0 for attempt in range(num_attempts): self.workers = [ MultiAgentEnvWorker(i, self.make_env_func, self.env_config) for i in range(self.num_agents) ] try: port_to_use = udp_port_num(self.env_config) port = find_available_port(port_to_use, increment=1000) log.debug('Using port %d', port) init_info = dict(port=port) for i, worker in enumerate(self.workers): worker.task_queue.put((init_info, TaskType.INIT)) if self.safe_init: time.sleep(1.0) # just in case else: time.sleep(0.01) for i, worker in enumerate(self.workers): worker.result_queue.get(timeout=5) worker.result_queue.task_done() worker.task_queue.join() except Exception as exc: for worker in self.workers: if isinstance(worker.process, threading.Thread): log.info( 'We cannot really kill a thread, so let the whole process die' ) raise RuntimeError( 'Critical error: worker stuck on initialization. Abort!' ) else: log.info('Killing process %r', worker.process.pid) kill(worker.process.pid) del self.workers log.warning('Could not initialize env, try again! Error: %r', exc) time.sleep(1) else: break if attempt >= num_attempts: log.error('Could not initialize env even after %d attempts. Fail!', attempt) raise RuntimeError( 'Critical error: worker stuck on initialization, num attempts exceeded. Abort!' ) log.debug('%d agent workers initialized for env %d!', len(self.workers), self.env_config.worker_index) log.debug('Took %d attempts!\n', attempt + 1) self.initialized = True
def maybe_load_from_checkpoint(cfg): filename = cfg_file(cfg) if not os.path.isfile(filename): log.warning('Saved parameter configuration for experiment %s not found!', cfg.experiment) log.warning('Starting experiment from scratch!') return AttrDict(vars(cfg)) return load_from_checkpoint(cfg)
def close(self): try: if self.game is not None: self.game.close() except RuntimeError as exc: log.warning('Runtime error in VizDoom game close(): %r', exc) if self.viewer is not None: self.viewer.close()
def _actors_update_shaping_scheme(self, policy_id): log.debug('Sending latest reward scheme to actors for policy %d...', policy_id) for actor_worker in self.actor_workers: reward_scheme_task = (PbtTask.UPDATE_REWARD_SCHEME, (policy_id, self.policy_reward_shaping[policy_id])) task = (TaskType.PBT, reward_scheme_task) try: actor_worker.task_queue.put(task, timeout=0.1) except Full: log.warning('Could not add task %r to queue, it is likely that worker died', task)
def is_udp_port_available(port): try: sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.bind(('', port)) sock.close() except OSError as exc: log.warning(f'UDP port {port} cannot be used {str(exc)}') return False else: return True
def wait_for_traj_buffers(self): print_warning = True while self.traj_tensors_available[:, :, self.traj_buffer_idx].min() == 0: if print_warning: log.warning( 'Waiting for trajectory buffer %d on actor %d-%d', self.traj_buffer_idx, self.worker_idx, self.split_idx, ) print_warning = False time.sleep(0.002)
def dmlab_env_by_name(name): for spec in DMLAB_ENVS: if spec.name == name: return spec # not a known "named" environment with a predefined spec log.warning('Level %s not found. Interpreting the level name as an unmodified DMLab-30 env name!', name) level = name.split('dmlab_')[1] spec = DmLabSpec(name, level) return spec
def save_model(self, timeout=None): self.model_saved_event.clear() save_task = (PbtTask.SAVE_MODEL, self.policy_id) self.task_queue.put((TaskType.PBT, save_task)) log.debug('Wait while learner %d saves the model...', self.policy_id) if self.model_saved_event.wait(timeout=timeout): log.debug('Learner %d saved the model!', self.policy_id) else: log.warning('Model saving request timed out!') self.model_saved_event.clear()
def get_algo_class(algo): algo_class = Agent if algo == 'PPO': from algorithms.ppo.agent_ppo import AgentPPO algo_class = AgentPPO elif algo == 'APPO': from algorithms.appo.appo import APPO algo_class = APPO else: log.warning('Algorithm %s is not supported', algo) return algo_class
def get_algo_class(algo): algo_class = None if algo == 'APPO': from algorithms.appo.appo import APPO algo_class = APPO elif algo == 'DUMMY_SAMPLER': from algorithms.dummy_sampler.sampler import DummySampler algo_class = DummySampler else: log.warning('Algorithm %s is not supported', algo) return algo_class
def add_trajectory_to_sparse_map_fixed_landmarks(existing_map, traj, dist_between_landmarks=30 ): m = existing_map is_new_landmark = [False] * len(traj) # is frame a landmark nodes = m.graph.nodes nodes[0]['traj_idx'] = 0 nodes[0]['frame_idx'] = 0 next_landmark = dist_between_landmarks num_exploration_frames = 0 if not hasattr(traj, 'mode'): log.warning('Trajectory must have mode') for i in range(len(traj)): obs = traj.obs[i] info = traj.infos[i] if hasattr(traj, 'mode'): mode = traj.mode[i] else: mode = TmaxMode.EXPLORATION if mode == TmaxMode.EXPLORATION: num_exploration_frames += 1 if num_exploration_frames >= next_landmark and i > 0: new_landmark_idx = m.add_landmark(obs, info, update_curr_landmark=True) nodes[new_landmark_idx]['traj_idx'] = m.num_trajectories nodes[new_landmark_idx]['frame_idx'] = i log.info( 'Added frame %d as a landmark %d to sparse map, traj %d', i, new_landmark_idx, m.num_trajectories, ) is_new_landmark[i] = True next_landmark += dist_between_landmarks m.num_trajectories += 1 return is_new_landmark
def wait_for_traj_buffers(self): """ In very rare cases the learner might not have freed the shared memory buffer by the time we need it. Here we wait until the learner is done with it. """ print_warning = True while self.traj_tensors_available[:, :, self.traj_buffer_idx].min() == 0: if print_warning: log.warning( 'Waiting for trajectory buffer %d on actor %d-%d', self.traj_buffer_idx, self.worker_idx, self.split_idx, ) print_warning = False time.sleep(0.002)
def get_info(self, landmark_idx): x = y = angle = 0 try: x, y = self.graph.node[landmark_idx]['pos'] angle = self.graph.node[landmark_idx]['angle'] except (KeyError, TypeError): log.warning( f'No coordinate information in landmark {landmark_idx}') pos = { 'agent_x': x, 'agent_y': y, 'agent_a': angle, } return {'pos': pos}
def fetch(self, key, pk3_path): """Environment object itself acts as a proxy to the global level cache.""" if not self.env_uses_level_cache: self.env_uses_level_cache = True # log.debug('Env %s uses level cache!', self.level_name) path = join(self.level_cache_path, key) if os.path.isfile(path): # copy the cached file to the path expected by DeepMind Lab shutil.copyfile(path, pk3_path) return True else: log.warning('Cache miss in environment %s key: %s!', self.level_name, key) return False
def load_checkpoint(checkpoints, device): if len(checkpoints) <= 0: log.warning('No checkpoints found') return None else: latest_checkpoint = checkpoints[-1] # extra safety mechanism to recover from spurious filesystem errors num_attempts = 3 for attempt in range(num_attempts): try: log.warning('Loading state from checkpoint %s...', latest_checkpoint) checkpoint_dict = torch.load(latest_checkpoint, map_location=device) return checkpoint_dict except Exception: log.exception(f'Could not load from checkpoint, attempt {attempt}')
def reset(self): self._ensure_initialized() if self.record_to is not None and not self.is_multiplayer: # does not work in multiplayer (uses different mechanism) if not os.path.exists(self.record_to): os.makedirs(self.record_to) demo_path = self.demo_path(self._num_episodes) log.warning('Recording episode demo to %s', demo_path) self.game.new_episode(demo_path) else: if self._num_episodes > 0: # no demo recording (default) self.game.new_episode() self.state = self.game.get_state() img = None try: img = self.state.screen_buffer except AttributeError: # sometimes Doom does not return screen buffer at all??? Rare bug pass if img is None: log.error( 'Game returned None screen buffer! This is not supposed to happen!' ) img = self._black_screen() # Swap current and previous histogram if self.current_histogram is not None and self.previous_histogram is not None: swap = self.current_histogram self.current_histogram = self.previous_histogram self.previous_histogram = swap self.current_histogram.fill(0) self._actions_flattened = None self._last_episode_info = copy.deepcopy(self._prev_info) self._prev_info = None self._num_episodes += 1 return np.transpose(img, (1, 2, 0))
def cuda_envvars(policy_id): orig_visible_devices = os.environ[f'{CUDA_ENVVAR}_backup_'] available_gpus = [int(g) for g in orig_visible_devices.split(',') if g] log.info('Available GPUs: %r', available_gpus) # it is crucial to proper CUDA_VISIBLE_DEVICES properly before calling any torch.cuda methods, e.g. device_count() # this is why we're forced to use the env vars num_gpus = len(available_gpus) if num_gpus == 0: log.warning('Not using a GPU for policy %d', policy_id) os.environ[CUDA_ENVVAR] = '' else: gpu_idx_to_use = available_gpus[policy_id % num_gpus] os.environ[CUDA_ENVVAR] = str(gpu_idx_to_use) log.info('Set environment var %s to %r for policy %d', CUDA_ENVVAR, os.environ[CUDA_ENVVAR], policy_id) log.debug('Visible devices: %r', torch.cuda.device_count())
def distances(self, session, obs_first_encoded, obs_second_encoded, infos_first=None, infos_second=None): if len(obs_first_encoded) <= 0: return [] if infos_first is None or infos_second is None: # fall back to standard distance net return super().distances(session, obs_first_encoded, obs_second_encoded) assert len(infos_first) == len(infos_second) far_distance = 250.0 d = [] for i in range(len(infos_first)): try: pos1, pos2 = infos_first[i]['pos'], infos_second[i]['pos'] except (KeyError, TypeError): log.warning('No coordinate information provided!') pos1 = pos2 = self._default_pos() x1, y1 = pos1['agent_x'], pos1['agent_y'] x2, y2 = pos2['agent_x'], pos2['agent_y'] ground_truth_distance = math.sqrt((x1 - x2)**2 + (y1 - y2)**2) ground_truth_distance = max( 0.0, ground_truth_distance ) # just in case, to avoid numerical issues # linear interpolation # 0 == 0.0 # >=300 == 1.0 distance_normalized = min(ground_truth_distance / far_distance, 1.0) d.append(distance_normalized) return d
def _process_training_data(self, data, timing, wait_stats=None): self.is_training = True buffer, batch_size, samples, env_steps = data assert samples == batch_size * self.cfg.num_batches_per_iteration self.env_steps += env_steps experience_size = buffer.rewards.shape[0] stats = dict(learner_env_steps=self.env_steps, policy_id=self.policy_id) with timing.add_time('train'): discarding_rate = self._discarding_rate() self._update_pbt() train_stats = self._train(buffer, batch_size, experience_size, timing) if train_stats is not None: stats['train'] = train_stats if wait_stats is not None: wait_avg, wait_min, wait_max = wait_stats stats['train']['wait_avg'] = wait_avg stats['train']['wait_min'] = wait_min stats['train']['wait_max'] = wait_max stats['train'][ 'discarded_rollouts'] = self.num_discarded_rollouts stats['train']['discarding_rate'] = discarding_rate stats['stats'] = memory_stats('learner', self.device) self.is_training = False try: self.report_queue.put(stats) except Full: log.warning( 'Could not report training stats, the report queue is full!')
def _load_checkpoint(self, checkpoints_dir): checkpoints = self._get_checkpoints(checkpoints_dir) if len(checkpoints) <= 0: log.warning('No checkpoints found in %s', experiment_dir(cfg=self.cfg)) return None else: latest_checkpoint = checkpoints[-1] log.warning('Loading state from checkpoint %s...', latest_checkpoint) if str( self.device ) == 'cuda': # the checkpoint will try to load onto the GPU storage unless specified checkpoint_dict = torch.load(latest_checkpoint) else: checkpoint_dict = torch.load( latest_checkpoint, map_location=lambda storage, loc: storage) return checkpoint_dict
def _process_rollouts(self, rollouts, timing): # batch_size can potentially change through PBT, so we should keep it the same and pass it around # using function arguments, instead of using global self.cfg batch_size = self.cfg.batch_size rollouts_in_macro_batch = self._macro_batch_size( batch_size) // self.cfg.rollout if len(rollouts) < rollouts_in_macro_batch: return rollouts discard_rollouts = 0 policy_version = self.train_step for r in rollouts: rollout_min_version = r['t']['policy_version'].min().item() if policy_version - rollout_min_version >= self.cfg.max_policy_lag: discard_rollouts += 1 self._mark_rollout_buffer_free(r) else: break if discard_rollouts > 0: log.warning( 'Discarding %d old rollouts, cut by policy lag threshold %d (learner %d)', discard_rollouts, self.cfg.max_policy_lag, self.policy_id, ) rollouts = rollouts[discard_rollouts:] self.num_discarded_rollouts += discard_rollouts if len(rollouts) >= rollouts_in_macro_batch: # process newest rollouts rollouts_to_process = rollouts[:rollouts_in_macro_batch] rollouts = rollouts[rollouts_in_macro_batch:] self._process_macro_batch(rollouts_to_process, batch_size, timing) # log.info('Unprocessed rollouts: %d (%d samples)', len(rollouts), len(rollouts) * self.cfg.rollout) return rollouts