Esempio n. 1
0
    def _game_init(self, with_locking=True, max_parallel=10):
        lock_file = lock = None
        if with_locking:
            lock_file = doom_lock_file(max_parallel)
            lock = FileLock(lock_file)

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                if with_locking:
                    with lock.acquire(timeout=20):
                        self.game.init()
                else:
                    self.game.init()

                break
            except Timeout:
                if with_locking:
                    log.debug(
                        'Another process currently holds the lock %s, attempt: %d',
                        lock_file,
                        init_attempt,
                    )
            except Exception as exc:
                log.warning('VizDoom game.init() threw an exception %r. Terminate process...', exc)
                from envs.env_utils import EnvCriticalError
                raise EnvCriticalError()
    def add_edge(self, i1, i2, loop_closure=False):
        initial_success = 0.01  # add to params?

        if i2 in self.graph[i1]:
            log.warning('Edge %d-%d already exists (%r)! Overriding!', i1, i2,
                        self.graph[i1])

        self.graph.add_edge(
            i1,
            i2,
            success=initial_success,
            last_traversal_frames=math.inf,
            attempted_traverse=0,
            loop_closure=loop_closure,
        )
        if not self.directed_graph:
            if i1 in self.graph[i2]:
                log.warning('Edge %d-%d already exists (%r)! Overriding!', i2,
                            i1, self.graph[i2])

            self.graph.add_edge(
                i2,
                i1,
                success=initial_success,
                last_traversal_frames=math.inf,
                attempted_traverse=0,
                loop_closure=loop_closure,
            )
def safe_get(q, timeout=1e6, msg='Queue timeout'):
    """Using queue.get() with timeout is necessary, otherwise KeyboardInterrupt is not handled."""
    while True:
        try:
            return q.get(timeout=timeout)
        except Empty:
            log.warning(msg)
Esempio n. 4
0
    def cat(self, dict_of_tensor_arrays, macro_batch_size, use_pinned_memory, timing):
        """
        Here 'macro_batch' is the overall size of experience per iteration.
        Macro-batch = mini-batch * num_batches_per_iteration
        """

        tensor_batch = self.batch_pool.get()

        if tensor_batch is not None:
            old_batch_size = tensor_batch_size(tensor_batch)
            if old_batch_size != macro_batch_size:
                # this can happen due to PBT changing batch size during the experiment
                log.warning('Tensor macro-batch size changed from %d to %d!', old_batch_size, macro_batch_size)
                log.warning('Discarding the cached tensor batch!')
                del tensor_batch
                tensor_batch = None

        if tensor_batch is None:
            tensor_batch = copy_dict_structure(dict_of_tensor_arrays)
            log.info('Allocating new CPU tensor batch (could not get from the pool)')

            for d1, cache_d, key, tensor_arr, _ in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch):
                cache_d[key] = torch.cat(tensor_arr, dim=0)
                if use_pinned_memory:
                    cache_d[key] = cache_d[key].pin_memory()
        else:
            with timing.add_time('batcher_mem'):
                for d1, cache_d, key, tensor_arr, cache_t in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch):
                    offset = 0
                    for t in tensor_arr:
                        first_dim = t.shape[0]
                        cache_t[offset:offset + first_dim].copy_(t)
                        offset += first_dim

        return tensor_batch
Esempio n. 5
0
def load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        raise Exception(
            f'Could not load saved parameters for experiment {cfg.experiment}')

    with open(filename, 'r') as json_file:
        json_params = json.load(json_file)
        log.warning('Loading existing experiment configuration from %s',
                    filename)
        loaded_cfg = AttrDict(json_params)

    # override the parameters in config file with values passed from command line
    for key, value in cfg.cli_args.items():
        if key in loaded_cfg and loaded_cfg[key] != value:
            log.debug(
                'Overriding arg %r with value %r passed from command line',
                key, value)
            loaded_cfg[key] = value

    # incorporate extra CLI parameters that were not present in JSON file
    for key, value in vars(cfg).items():
        if key not in loaded_cfg:
            log.debug(
                'Adding new argument %r=%r that is not in the saved config file!',
                key, value)
            loaded_cfg[key] = value

    return loaded_cfg
Esempio n. 6
0
    def add_new_level(self, level, seed, key, pk3_path):
        with self.locks[level]:
            num_used_seeds = self.num_seeds_used_in_current_run[level].value
            if num_used_seeds < len(self.available_seeds.get(level, [])):
                log.warning(
                    'We should only add new levels to cache if we ran out of pre-generated levels (seeds)'
                )
                log.warning(
                    'Num used seeds: %d, available seeds: %d, level: %s, seed %r, key %r',
                    num_used_seeds,
                    len(self.available_seeds.get(level, [])),
                    level,
                    seed,
                    key,
                )

                # some DMLab-30 environments, e.g. language_select_located_object may require different levels even
                # for the same seed. This is most likely a bug in DeepMind Lab, because the same seed should generate
                # identical environments

            path = os.path.join(self.cache_dir, key)
            if not os.path.isfile(path):
                # copy the cached file DeepMind Lab has written to the cache directory
                shutil.copyfile(pk3_path, path)

            # add new map to the list of available seeds for this level
            # so it can be used next time we run the experiment
            lvl_seeds_filename = join(self.cache_dir, level_to_filename(level))
            safe_ensure_dir_exists(os.path.dirname(lvl_seeds_filename))
            with open(lvl_seeds_filename, 'a') as fobj:
                fobj.write(f'{seed} {key}\n')
Esempio n. 7
0
def action_name_to_action(action_name):
    for action, name in ACTION_MEANING.items():
        if name == action_name:
            return action

    log.warning('Unknown action %s', action_name)
    return None
Esempio n. 8
0
def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
    scenario_name = env_name.split('voxel_env_')[-1].casefold()
    log.debug('Using scenario %s', scenario_name)

    if 'multitask' in scenario_name:
        if env_config is not None and 'worker_index' in env_config:
            task_idx = env_config['worker_index']
        else:
            log.warning(
                'Could not find information about task id. Use task_id=0. (It is okay if this message appears once)'
            )
            task_idx = 0

        env = make_env_multitask(
            scenario_name,
            task_idx,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )
    else:
        env = VoxelEnv(
            scenario_name=scenario_name,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )

    env = Wrapper(env, cfg.voxel_increase_team_spirit,
                  cfg.voxel_max_team_spirit_steps)
    return env
Esempio n. 9
0
    def _add_shortcuts(self, m, pairwise_distances):
        self.remove_shortcuts(m)  # first - remove all existing shortcuts

        shortcuts = self._shortcuts_distance(
            m,
            pairwise_distances,
            self.params.min_shortcut_dist,
            self.params.shortcut_window,
        )
        if len(shortcuts) <= 0:
            log.warning('Could not find any shortcuts')
            return

        random.shuffle(shortcuts)
        shortcut_risks = [s[0] for s in shortcuts]

        shortcuts_to_keep = int(self.params.shortcuts_to_keep_fraction *
                                m.num_landmarks())

        keep = min(shortcuts_to_keep, len(shortcuts))
        percentile = (keep / len(shortcuts)) * 100
        max_risk = np.percentile(shortcut_risks, percentile)
        max_risk = min(max_risk, self.params.shortcut_risk_threshold)

        log.debug('Keep shortcuts with risk <= %.3f...', max_risk)
        shortcuts = [s for s in shortcuts if s[0] <= max_risk][:keep]
        shortcuts.sort(key=lambda x: x[-1], reverse=True
                       )  # sort according to ground truth distance for logging

        log.debug('Kept %d shortcuts: %r...', len(shortcuts), shortcuts[:5])

        for shortcut in shortcuts:
            risk, i1, i2, d, coord_dist = shortcut
            m.add_edge(i1, i2, loop_closure=True)
    def _ensure_initialized(self):
        if self.initialized:
            return

        num_attempts = 25
        attempt = 0
        for attempt in range(num_attempts):
            self.workers = [
                MultiAgentEnvWorker(i, self.make_env_func, self.env_config)
                for i in range(self.num_agents)
            ]

            try:
                port_to_use = udp_port_num(self.env_config)
                port = find_available_port(port_to_use, increment=1000)
                log.debug('Using port %d', port)
                init_info = dict(port=port)

                for i, worker in enumerate(self.workers):
                    worker.task_queue.put((init_info, TaskType.INIT))
                    if self.safe_init:
                        time.sleep(1.0)  # just in case
                    else:
                        time.sleep(0.01)

                for i, worker in enumerate(self.workers):
                    worker.result_queue.get(timeout=5)
                    worker.result_queue.task_done()
                    worker.task_queue.join()
            except Exception as exc:
                for worker in self.workers:
                    if isinstance(worker.process, threading.Thread):
                        log.info(
                            'We cannot really kill a thread, so let the whole process die'
                        )
                        raise RuntimeError(
                            'Critical error: worker stuck on initialization. Abort!'
                        )
                    else:
                        log.info('Killing process %r', worker.process.pid)
                        kill(worker.process.pid)
                del self.workers
                log.warning('Could not initialize env, try again! Error: %r',
                            exc)
                time.sleep(1)
            else:
                break

        if attempt >= num_attempts:
            log.error('Could not initialize env even after %d attempts. Fail!',
                      attempt)
            raise RuntimeError(
                'Critical error: worker stuck on initialization, num attempts exceeded. Abort!'
            )

        log.debug('%d agent workers initialized for env %d!',
                  len(self.workers), self.env_config.worker_index)
        log.debug('Took %d attempts!\n', attempt + 1)
        self.initialized = True
Esempio n. 11
0
def maybe_load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        log.warning('Saved parameter configuration for experiment %s not found!', cfg.experiment)
        log.warning('Starting experiment from scratch!')
        return AttrDict(vars(cfg))

    return load_from_checkpoint(cfg)
Esempio n. 12
0
    def close(self):
        try:
            if self.game is not None:
                self.game.close()
        except RuntimeError as exc:
            log.warning('Runtime error in VizDoom game close(): %r', exc)

        if self.viewer is not None:
            self.viewer.close()
 def _actors_update_shaping_scheme(self, policy_id):
     log.debug('Sending latest reward scheme to actors for policy %d...', policy_id)
     for actor_worker in self.actor_workers:
         reward_scheme_task = (PbtTask.UPDATE_REWARD_SCHEME, (policy_id, self.policy_reward_shaping[policy_id]))
         task = (TaskType.PBT, reward_scheme_task)
         try:
             actor_worker.task_queue.put(task, timeout=0.1)
         except Full:
             log.warning('Could not add task %r to queue, it is likely that worker died', task)
Esempio n. 14
0
def is_udp_port_available(port):
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sock.bind(('', port))
        sock.close()
    except OSError as exc:
        log.warning(f'UDP port {port} cannot be used {str(exc)}')
        return False
    else:
        return True
Esempio n. 15
0
 def wait_for_traj_buffers(self):
     print_warning = True
     while self.traj_tensors_available[:, :, self.traj_buffer_idx].min() == 0:
         if print_warning:
             log.warning(
                 'Waiting for trajectory buffer %d on actor %d-%d',
                 self.traj_buffer_idx, self.worker_idx, self.split_idx,
             )
             print_warning = False
         time.sleep(0.002)
Esempio n. 16
0
def dmlab_env_by_name(name):
    for spec in DMLAB_ENVS:
        if spec.name == name:
            return spec

    # not a known "named" environment with a predefined spec
    log.warning('Level %s not found. Interpreting the level name as an unmodified DMLab-30 env name!', name)
    level = name.split('dmlab_')[1]
    spec = DmLabSpec(name, level)
    return spec
Esempio n. 17
0
 def save_model(self, timeout=None):
     self.model_saved_event.clear()
     save_task = (PbtTask.SAVE_MODEL, self.policy_id)
     self.task_queue.put((TaskType.PBT, save_task))
     log.debug('Wait while learner %d saves the model...', self.policy_id)
     if self.model_saved_event.wait(timeout=timeout):
         log.debug('Learner %d saved the model!', self.policy_id)
     else:
         log.warning('Model saving request timed out!')
     self.model_saved_event.clear()
Esempio n. 18
0
def get_algo_class(algo):
    algo_class = Agent

    if algo == 'PPO':
        from algorithms.ppo.agent_ppo import AgentPPO
        algo_class = AgentPPO
    elif algo == 'APPO':
        from algorithms.appo.appo import APPO
        algo_class = APPO
    else:
        log.warning('Algorithm %s is not supported', algo)

    return algo_class
Esempio n. 19
0
def get_algo_class(algo):
    algo_class = None

    if algo == 'APPO':
        from algorithms.appo.appo import APPO
        algo_class = APPO
    elif algo == 'DUMMY_SAMPLER':
        from algorithms.dummy_sampler.sampler import DummySampler
        algo_class = DummySampler
    else:
        log.warning('Algorithm %s is not supported', algo)

    return algo_class
Esempio n. 20
0
    def add_trajectory_to_sparse_map_fixed_landmarks(existing_map,
                                                     traj,
                                                     dist_between_landmarks=30
                                                     ):
        m = existing_map

        is_new_landmark = [False] * len(traj)  # is frame a landmark

        nodes = m.graph.nodes
        nodes[0]['traj_idx'] = 0
        nodes[0]['frame_idx'] = 0

        next_landmark = dist_between_landmarks
        num_exploration_frames = 0

        if not hasattr(traj, 'mode'):
            log.warning('Trajectory must have mode')

        for i in range(len(traj)):
            obs = traj.obs[i]
            info = traj.infos[i]

            if hasattr(traj, 'mode'):
                mode = traj.mode[i]
            else:
                mode = TmaxMode.EXPLORATION

            if mode == TmaxMode.EXPLORATION:
                num_exploration_frames += 1

            if num_exploration_frames >= next_landmark and i > 0:
                new_landmark_idx = m.add_landmark(obs,
                                                  info,
                                                  update_curr_landmark=True)

                nodes[new_landmark_idx]['traj_idx'] = m.num_trajectories
                nodes[new_landmark_idx]['frame_idx'] = i

                log.info(
                    'Added frame %d as a landmark %d to sparse map, traj %d',
                    i,
                    new_landmark_idx,
                    m.num_trajectories,
                )
                is_new_landmark[i] = True
                next_landmark += dist_between_landmarks

        m.num_trajectories += 1
        return is_new_landmark
Esempio n. 21
0
    def wait_for_traj_buffers(self):
        """
        In very rare cases the learner might not have freed the shared memory buffer by the time we need it.
        Here we wait until the learner is done with it.
        """

        print_warning = True
        while self.traj_tensors_available[:, :, self.traj_buffer_idx].min() == 0:
            if print_warning:
                log.warning(
                    'Waiting for trajectory buffer %d on actor %d-%d',
                    self.traj_buffer_idx, self.worker_idx, self.split_idx,
                )
                print_warning = False
            time.sleep(0.002)
    def get_info(self, landmark_idx):
        x = y = angle = 0
        try:
            x, y = self.graph.node[landmark_idx]['pos']
            angle = self.graph.node[landmark_idx]['angle']
        except (KeyError, TypeError):
            log.warning(
                f'No coordinate information in landmark {landmark_idx}')

        pos = {
            'agent_x': x,
            'agent_y': y,
            'agent_a': angle,
        }
        return {'pos': pos}
Esempio n. 23
0
    def fetch(self, key, pk3_path):
        """Environment object itself acts as a proxy to the global level cache."""
        if not self.env_uses_level_cache:
            self.env_uses_level_cache = True
            # log.debug('Env %s uses level cache!', self.level_name)

        path = join(self.level_cache_path, key)

        if os.path.isfile(path):
            # copy the cached file to the path expected by DeepMind Lab
            shutil.copyfile(path, pk3_path)
            return True
        else:
            log.warning('Cache miss in environment %s key: %s!',
                        self.level_name, key)
            return False
Esempio n. 24
0
    def load_checkpoint(checkpoints, device):
        if len(checkpoints) <= 0:
            log.warning('No checkpoints found')
            return None
        else:
            latest_checkpoint = checkpoints[-1]

            # extra safety mechanism to recover from spurious filesystem errors
            num_attempts = 3
            for attempt in range(num_attempts):
                try:
                    log.warning('Loading state from checkpoint %s...', latest_checkpoint)
                    checkpoint_dict = torch.load(latest_checkpoint, map_location=device)
                    return checkpoint_dict
                except Exception:
                    log.exception(f'Could not load from checkpoint, attempt {attempt}')
Esempio n. 25
0
    def reset(self):
        self._ensure_initialized()

        if self.record_to is not None and not self.is_multiplayer:
            # does not work in multiplayer (uses different mechanism)
            if not os.path.exists(self.record_to):
                os.makedirs(self.record_to)

            demo_path = self.demo_path(self._num_episodes)
            log.warning('Recording episode demo to %s', demo_path)
            self.game.new_episode(demo_path)
        else:
            if self._num_episodes > 0:
                # no demo recording (default)
                self.game.new_episode()

        self.state = self.game.get_state()
        img = None
        try:
            img = self.state.screen_buffer
        except AttributeError:
            # sometimes Doom does not return screen buffer at all??? Rare bug
            pass

        if img is None:
            log.error(
                'Game returned None screen buffer! This is not supposed to happen!'
            )
            img = self._black_screen()

        # Swap current and previous histogram
        if self.current_histogram is not None and self.previous_histogram is not None:
            swap = self.current_histogram
            self.current_histogram = self.previous_histogram
            self.previous_histogram = swap
            self.current_histogram.fill(0)

        self._actions_flattened = None
        self._last_episode_info = copy.deepcopy(self._prev_info)
        self._prev_info = None

        self._num_episodes += 1

        return np.transpose(img, (1, 2, 0))
Esempio n. 26
0
def cuda_envvars(policy_id):
    orig_visible_devices = os.environ[f'{CUDA_ENVVAR}_backup_']
    available_gpus = [int(g) for g in orig_visible_devices.split(',') if g]
    log.info('Available GPUs: %r', available_gpus)

    # it is crucial to proper CUDA_VISIBLE_DEVICES properly before calling any torch.cuda methods, e.g. device_count()
    # this is why we're forced to use the env vars

    num_gpus = len(available_gpus)
    if num_gpus == 0:
        log.warning('Not using a GPU for policy %d', policy_id)
        os.environ[CUDA_ENVVAR] = ''
    else:
        gpu_idx_to_use = available_gpus[policy_id % num_gpus]
        os.environ[CUDA_ENVVAR] = str(gpu_idx_to_use)
        log.info('Set environment var %s to %r for policy %d', CUDA_ENVVAR,
                 os.environ[CUDA_ENVVAR], policy_id)

        log.debug('Visible devices: %r', torch.cuda.device_count())
    def distances(self,
                  session,
                  obs_first_encoded,
                  obs_second_encoded,
                  infos_first=None,
                  infos_second=None):
        if len(obs_first_encoded) <= 0:
            return []

        if infos_first is None or infos_second is None:
            # fall back to standard distance net
            return super().distances(session, obs_first_encoded,
                                     obs_second_encoded)

        assert len(infos_first) == len(infos_second)

        far_distance = 250.0

        d = []
        for i in range(len(infos_first)):
            try:
                pos1, pos2 = infos_first[i]['pos'], infos_second[i]['pos']
            except (KeyError, TypeError):
                log.warning('No coordinate information provided!')
                pos1 = pos2 = self._default_pos()

            x1, y1 = pos1['agent_x'], pos1['agent_y']
            x2, y2 = pos2['agent_x'], pos2['agent_y']

            ground_truth_distance = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
            ground_truth_distance = max(
                0.0, ground_truth_distance
            )  # just in case, to avoid numerical issues

            # linear interpolation
            # 0 == 0.0
            # >=300 == 1.0
            distance_normalized = min(ground_truth_distance / far_distance,
                                      1.0)
            d.append(distance_normalized)

        return d
Esempio n. 28
0
    def _process_training_data(self, data, timing, wait_stats=None):
        self.is_training = True

        buffer, batch_size, samples, env_steps = data
        assert samples == batch_size * self.cfg.num_batches_per_iteration

        self.env_steps += env_steps
        experience_size = buffer.rewards.shape[0]

        stats = dict(learner_env_steps=self.env_steps,
                     policy_id=self.policy_id)

        with timing.add_time('train'):
            discarding_rate = self._discarding_rate()

            self._update_pbt()

            train_stats = self._train(buffer, batch_size, experience_size,
                                      timing)

            if train_stats is not None:
                stats['train'] = train_stats

                if wait_stats is not None:
                    wait_avg, wait_min, wait_max = wait_stats
                    stats['train']['wait_avg'] = wait_avg
                    stats['train']['wait_min'] = wait_min
                    stats['train']['wait_max'] = wait_max

                stats['train'][
                    'discarded_rollouts'] = self.num_discarded_rollouts
                stats['train']['discarding_rate'] = discarding_rate

                stats['stats'] = memory_stats('learner', self.device)

        self.is_training = False

        try:
            self.report_queue.put(stats)
        except Full:
            log.warning(
                'Could not report training stats, the report queue is full!')
Esempio n. 29
0
    def _load_checkpoint(self, checkpoints_dir):
        checkpoints = self._get_checkpoints(checkpoints_dir)
        if len(checkpoints) <= 0:
            log.warning('No checkpoints found in %s',
                        experiment_dir(cfg=self.cfg))
            return None
        else:
            latest_checkpoint = checkpoints[-1]
            log.warning('Loading state from checkpoint %s...',
                        latest_checkpoint)

            if str(
                    self.device
            ) == 'cuda':  # the checkpoint will try to load onto the GPU storage unless specified
                checkpoint_dict = torch.load(latest_checkpoint)
            else:
                checkpoint_dict = torch.load(
                    latest_checkpoint,
                    map_location=lambda storage, loc: storage)

            return checkpoint_dict
Esempio n. 30
0
    def _process_rollouts(self, rollouts, timing):
        # batch_size can potentially change through PBT, so we should keep it the same and pass it around
        # using function arguments, instead of using global self.cfg

        batch_size = self.cfg.batch_size
        rollouts_in_macro_batch = self._macro_batch_size(
            batch_size) // self.cfg.rollout

        if len(rollouts) < rollouts_in_macro_batch:
            return rollouts

        discard_rollouts = 0
        policy_version = self.train_step
        for r in rollouts:
            rollout_min_version = r['t']['policy_version'].min().item()
            if policy_version - rollout_min_version >= self.cfg.max_policy_lag:
                discard_rollouts += 1
                self._mark_rollout_buffer_free(r)
            else:
                break

        if discard_rollouts > 0:
            log.warning(
                'Discarding %d old rollouts, cut by policy lag threshold %d (learner %d)',
                discard_rollouts,
                self.cfg.max_policy_lag,
                self.policy_id,
            )
            rollouts = rollouts[discard_rollouts:]
            self.num_discarded_rollouts += discard_rollouts

        if len(rollouts) >= rollouts_in_macro_batch:
            # process newest rollouts
            rollouts_to_process = rollouts[:rollouts_in_macro_batch]
            rollouts = rollouts[rollouts_in_macro_batch:]

            self._process_macro_batch(rollouts_to_process, batch_size, timing)
            # log.info('Unprocessed rollouts: %d (%d samples)', len(rollouts), len(rollouts) * self.cfg.rollout)

        return rollouts