Beispiel #1
0
def load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        raise Exception(
            f'Could not load saved parameters for experiment {cfg.experiment}')

    with open(filename, 'r') as json_file:
        json_params = json.load(json_file)
        log.warning('Loading existing experiment configuration from %s',
                    filename)
        loaded_cfg = AttrDict(json_params)

    # override the parameters in config file with values passed from command line
    for key, value in cfg.cli_args.items():
        if key in loaded_cfg and loaded_cfg[key] != value:
            log.debug(
                'Overriding arg %r with value %r passed from command line',
                key, value)
            loaded_cfg[key] = value

    # incorporate extra CLI parameters that were not present in JSON file
    for key, value in vars(cfg).items():
        if key not in loaded_cfg:
            log.debug(
                'Adding new argument %r=%r that is not in the saved config file!',
                key, value)
            loaded_cfg[key] = value

    return loaded_cfg
Beispiel #2
0
    def add_new_level(self, level, seed, key, pk3_path):
        with self.locks[level]:
            num_used_seeds = self.num_seeds_used_in_current_run[level].value
            if num_used_seeds < len(self.available_seeds.get(level, [])):
                log.warning(
                    'We should only add new levels to cache if we ran out of pre-generated levels (seeds)'
                )
                log.warning(
                    'Num used seeds: %d, available seeds: %d, level: %s, seed %r, key %r',
                    num_used_seeds,
                    len(self.available_seeds.get(level, [])),
                    level,
                    seed,
                    key,
                )

                # some DMLab-30 environments, e.g. language_select_located_object may require different levels even
                # for the same seed. This is most likely a bug in DeepMind Lab, because the same seed should generate
                # identical environments

            path = os.path.join(self.cache_dir, key)
            if not os.path.isfile(path):
                # copy the cached file DeepMind Lab has written to the cache directory
                shutil.copyfile(pk3_path, path)

            # add new map to the list of available seeds for this level
            # so it can be used next time we run the experiment
            lvl_seeds_filename = join(self.cache_dir, level_to_filename(level))
            safe_ensure_dir_exists(os.path.dirname(lvl_seeds_filename))
            with open(lvl_seeds_filename, 'a') as fobj:
                fobj.write(f'{seed} {key}\n')
Beispiel #3
0
def safe_get(q, timeout=1e6, msg='Queue timeout'):
    """Using queue.get() with timeout is necessary, otherwise KeyboardInterrupt is not handled."""
    while True:
        try:
            return q.get(timeout=timeout)
        except Empty:
            log.warning(msg)
Beispiel #4
0
    def _game_init(self, with_locking=True, max_parallel=10):
        lock_file = lock = None
        if with_locking:
            lock_file = doom_lock_file(max_parallel)
            lock = FileLock(lock_file)

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                if with_locking:
                    with lock.acquire(timeout=20):
                        self.game.init()
                else:
                    self.game.init()

                break
            except Timeout:
                if with_locking:
                    log.debug(
                        'Another process currently holds the lock %s, attempt: %d',
                        lock_file,
                        init_attempt,
                    )
            except Exception as exc:
                log.warning(
                    'VizDoom game.init() threw an exception %r. Terminate process...',
                    exc)
                from sample_factory.envs.env_utils import EnvCriticalError
                raise EnvCriticalError()
    def cat(self, dict_of_tensor_arrays, macro_batch_size, use_pinned_memory, timing):
        """
        Here 'macro_batch' is the overall size of experience per iteration.
        Macro-batch = mini-batch * num_batches_per_iteration
        """

        tensor_batch = self.batch_pool.get()

        if tensor_batch is not None:
            old_batch_size = tensor_batch_size(tensor_batch)
            if old_batch_size != macro_batch_size:
                # this can happen due to PBT changing batch size during the experiment
                log.warning('Tensor macro-batch size changed from %d to %d!', old_batch_size, macro_batch_size)
                log.warning('Discarding the cached tensor batch!')
                del tensor_batch
                tensor_batch = None

        if tensor_batch is None:
            tensor_batch = copy_dict_structure(dict_of_tensor_arrays)
            log.info('Allocating new CPU tensor batch (could not get from the pool)')

            for d1, cache_d, key, tensor_arr, _ in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch):
                cache_d[key] = torch.cat(tensor_arr, dim=0)
                if use_pinned_memory:
                    cache_d[key] = cache_d[key].pin_memory()
        else:
            with timing.add_time('batcher_mem'):
                for d1, cache_d, key, tensor_arr, cache_t in iter_dicts_recursively(dict_of_tensor_arrays, tensor_batch):
                    offset = 0
                    for t in tensor_arr:
                        first_dim = t.shape[0]
                        cache_t[offset:offset + first_dim].copy_(t)
                        offset += first_dim

        return tensor_batch
Beispiel #6
0
    def resolve_env_name(self, full_env_name):
        """
        :param full_env_name: complete name of the environment, to be passed to the make_env_func, e.g. atari_breakout
        :return: env registry entry
        :rtype: EnvRegistryEntry
        """
        # we find a match with a registered env family prefix
        for env_prefix, registry_entry in self.registry.items():
            if not full_env_name.startswith(env_prefix):
                continue

            # We found a match. If it's a callable, we should first handle a deferred registry entry
            if callable(registry_entry):
                make_env_func, add_extra_params_func, override_default_params_func = registry_entry(
                )
                self.register_env(env_prefix, make_env_func,
                                  add_extra_params_func,
                                  override_default_params_func)

            return self.registry[env_prefix]

        msg = (
            f'Could not resolve {full_env_name}. '
            'Did you register the family of environments in the registry? See sample_factory_examples for details.'
        )
        log.warning(msg)
        raise RuntimeError(msg)
def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
    scenario_name = env_name.split('voxel_env_')[-1].casefold()
    log.debug('Using scenario %s', scenario_name)

    if 'multitask' in scenario_name:
        if env_config is not None and 'worker_index' in env_config:
            task_idx = env_config['worker_index']
        else:
            log.warning('Could not find information about task id. Use task_id=0. (It is okay if this message appears once)')
            task_idx = 0

        env = make_env_multitask(
            scenario_name,
            task_idx,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )
    else:
        env = VoxelEnv(
            scenario_name=scenario_name,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )

    env = Wrapper(env, cfg.voxel_increase_team_spirit, cfg.voxel_max_team_spirit_steps)
    return env
Beispiel #8
0
    def get_trajectory_buffers(self,
                               num_buffers: int,
                               timing: Optional = None):
        """
        :param num_buffers: number of free buffer indices to obtain
        :param timing: for performance analysis
        :return: a list of indices of free buffers
        """
        indices: List[int] = []
        block = False

        while len(indices) < num_buffers:
            with timing.add_time(
                    'wait_buffers'
            ) if timing is not None else contextlib.suppress():
                try:
                    indices.extend(
                        self.free_buffers_queue.get_many(
                            max_messages_to_get=num_buffers - len(indices),
                            timeout=5,
                            block=block,
                        ))
                except faster_fifo.Empty:
                    log.warning('Waiting for %d trajectory buffers...',
                                num_buffers - len(indices))

                if len(indices) < num_buffers:
                    block = True

        return indices
Beispiel #9
0
    def close(self):
        try:
            if self.game is not None:
                self.game.close()
        except RuntimeError as exc:
            log.warning('Runtime error in VizDoom game close(): %r', exc)

        if self.viewer is not None:
            self.viewer.close()
Beispiel #10
0
def is_udp_port_available(port):
    try:
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        sock.bind(('', port))
        sock.close()
    except OSError as exc:
        log.warning(f'UDP port {port} cannot be used {str(exc)}')
        return False
    else:
        return True
Beispiel #11
0
def maybe_load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        log.warning(
            'Saved parameter configuration for experiment %s not found!',
            cfg.experiment)
        log.warning('Starting experiment from scratch!')
        return AttrDict(vars(cfg))

    return load_from_checkpoint(cfg)
Beispiel #12
0
def register_custom_encoder(custom_encoder_name, encoder_cls):
    if custom_encoder_name in ENCODER_REGISTRY:
        log.warning('Encoder %s already registered', custom_encoder_name)

    assert issubclass(
        encoder_cls,
        EncoderBase), 'Custom encoders must be derived from EncoderBase'

    log.debug('Adding model class %r to registry (with name %s)', encoder_cls,
              custom_encoder_name)
    ENCODER_REGISTRY[custom_encoder_name] = encoder_cls
Beispiel #13
0
def dmlab_env_by_name(name):
    for spec in DMLAB_ENVS:
        if spec.name == name:
            return spec

    # not a known "named" environment with a predefined spec
    log.warning(
        'Level %s not found. Interpreting the level name as an unmodified DMLab-30 env name!',
        name)
    level = name.split('dmlab_')[1]
    spec = DmLabSpec(name, level)
    return spec
Beispiel #14
0
def get_algo_class(algo):
    algo_class = None

    if algo == 'APPO':
        from sample_factory.algorithms.appo.appo import APPO
        algo_class = APPO
    elif algo == 'DUMMY_SAMPLER':
        from sample_factory.algorithms.dummy_sampler import DummySampler
        algo_class = DummySampler
    else:
        log.warning('Algorithm %s is not supported', algo)

    return algo_class
Beispiel #15
0
 def _actors_update_shaping_scheme(self, policy_id):
     log.debug('Sending latest reward scheme to actors for policy %d...',
               policy_id)
     for actor_worker in self.actor_workers:
         reward_scheme_task = (PbtTask.UPDATE_REWARD_SCHEME,
                               (policy_id,
                                self.policy_reward_shaping[policy_id]))
         task = (TaskType.PBT, reward_scheme_task)
         try:
             actor_worker.task_queue.put(task, timeout=0.1)
         except Full:
             log.warning(
                 'Could not add task %r to queue, it is likely that worker died',
                 task)
Beispiel #16
0
    def fetch(self, key, pk3_path):
        """Environment object itself acts as a proxy to the global level cache."""
        if not self.env_uses_level_cache:
            self.env_uses_level_cache = True
            # log.debug('Env %s uses level cache!', self.level_name)

        path = join(self.level_cache_path, key)

        if os.path.isfile(path):
            # copy the cached file to the path expected by DeepMind Lab
            shutil.copyfile(path, pk3_path)
            return True
        else:
            log.warning('Cache miss in environment %s key: %s!',
                        self.level_name, key)
            return False
Beispiel #17
0
    def wait_for_traj_buffers(self):
        """
        In very rare cases the learner might not have freed the shared memory buffer by the time we need it.
        Here we wait until the learner is done with it.
        """

        print_warning = True
        while self.traj_tensors_available[:, :,
                                          self.traj_buffer_idx].min() == 0:
            if print_warning:
                log.warning(
                    'Waiting for trajectory buffer %d on actor %d-%d',
                    self.traj_buffer_idx,
                    self.worker_idx,
                    self.split_idx,
                )
                print_warning = False
            time.sleep(0.002)
Beispiel #18
0
    def reset(self):
        self._ensure_initialized()

        if self.record_to is not None and not self.is_multiplayer:
            # does not work in multiplayer (uses different mechanism)
            if not os.path.exists(self.record_to):
                os.makedirs(self.record_to)

            demo_path = self.demo_path(self._num_episodes)
            log.warning('Recording episode demo to %s', demo_path)
            self.game.new_episode(demo_path)
        else:
            if self._num_episodes > 0:
                # no demo recording (default)
                self.game.new_episode()

        self.state = self.game.get_state()
        img = None
        try:
            img = self.state.screen_buffer
        except AttributeError:
            # sometimes Doom does not return screen buffer at all??? Rare bug
            pass

        if img is None:
            log.error(
                'Game returned None screen buffer! This is not supposed to happen!'
            )
            img = self._black_screen()

        # Swap current and previous histogram
        if self.current_histogram is not None and self.previous_histogram is not None:
            swap = self.current_histogram
            self.current_histogram = self.previous_histogram
            self.previous_histogram = swap
            self.current_histogram.fill(0)

        self._actions_flattened = None
        self._last_episode_info = copy.deepcopy(self._prev_info)
        self._prev_info = None

        self._num_episodes += 1

        return np.transpose(img, (1, 2, 0))
Beispiel #19
0
    def generate_experiments(self, experiment_arg_name, customize_experiment_name, param_prefix):
        """Yields tuples of (cmd, experiment_name)"""
        num_experiments = 1 if len(self.params) == 0 else len(self.params)

        for experiment_idx in range(num_experiments):
            cmd_tokens = [self.cmd]
            experiment_name_tokens = [self.base_name]

            # abbreviations for parameter names that we've used
            param_shorthands = []

            if len(self.params) > 0:
                params = self.params[experiment_idx]
                for param, value in params.items():
                    param_str = f'{param_prefix}{param}={value}'
                    cmd_tokens.append(param_str)

                    param_tokens = re.split('[._-]', param)
                    shorthand_tokens = [t[0] for t in param_tokens[:-1]]

                    last_token_l = min(3, len(param_tokens[-1]))
                    shorthand = '.'.join(shorthand_tokens + [param_tokens[-1][:last_token_l]])
                    while last_token_l <= len(param_tokens[-1]) and shorthand in param_shorthands:
                        last_token_l += 1
                        shorthand = '.'.join(shorthand_tokens + [param_tokens[-1][:last_token_l]])

                    param_shorthands.append(shorthand)
                    experiment_name_token = f'{shorthand}_{value}'
                    experiment_name_tokens.append(experiment_name_token)

            if customize_experiment_name:
                experiment_name = f'{experiment_idx:02d}_' + '_'.join(experiment_name_tokens)
                if len(experiment_name) > 100:
                    log.warning('Experiment name is extra long! (%d characters)', len(experiment_name))
            else:
                experiment_name = self.base_name

            cmd_tokens.append(f'{experiment_arg_name}={experiment_name}')
            param_str = ' '.join(cmd_tokens)

            yield param_str, experiment_name
Beispiel #20
0
    def _run(self):
        """
        Main loop of the actor worker (rollout worker).
        Process tasks (mainly ROLLOUT_STEP) until we get the termination signal, which usually means end of training.
        Currently there is no mechanism to restart dead workers if something bad happens during training. We can only
        retry on the initial reset(). This is definitely something to work on.
        """
        log.info('Initializing vector env runner %d...', self.worker_idx)

        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        if self.cfg.actor_worker_gpus:
            set_gpus_for_process(
                self.worker_idx,
                num_gpus_per_process=1,
                process_type='actor',
                gpu_mask=self.cfg.actor_worker_gpus,
            )

        torch.multiprocessing.set_sharing_strategy('file_system')

        timing = Timing()

        last_report = time.time()
        with torch.no_grad():
            while not self.terminate:
                try:
                    try:
                        with timing.add_time('waiting'), timing.timeit(
                                'wait_actor'):
                            tasks = self.task_queue.get_many(timeout=0.1)
                    except Empty:
                        tasks = []

                    for task in tasks:
                        task_type, data = task

                        if task_type == TaskType.INIT:
                            self._init()
                            continue

                        if task_type == TaskType.TERMINATE:
                            self._terminate()
                            break

                        # handling actual workload
                        if task_type == TaskType.ROLLOUT_STEP:
                            if 'work' not in timing:
                                timing.waiting = 0  # measure waiting only after real work has started

                            with timing.add_time('work'), timing.timeit(
                                    'one_step'):
                                self._advance_rollouts(data, timing)
                        elif task_type == TaskType.RESET:
                            with timing.add_time('reset'):
                                self._handle_reset()
                        elif task_type == TaskType.PBT:
                            self._process_pbt_task(data)
                        elif task_type == TaskType.UPDATE_ENV_STEPS:
                            for env in self.env_runners:
                                env.update_env_steps(data)

                    if time.time() - last_report > 5.0 and 'one_step' in timing:
                        timing_stats = dict(wait_actor=timing.wait_actor,
                                            step_actor=timing.one_step)
                        memory_mb = memory_consumption_mb()
                        stats = dict(memory_actor=memory_mb)
                        safe_put(self.report_queue,
                                 dict(timing=timing_stats, stats=stats),
                                 queue_name='report')
                        last_report = time.time()

                except RuntimeError as exc:
                    log.warning(
                        'Error while processing data w: %d, exception: %s',
                        self.worker_idx, exc)
                    log.warning('Terminate process...')
                    self.terminate = True
                    safe_put(self.report_queue,
                             dict(critical_error=self.worker_idx),
                             queue_name='report')
                except KeyboardInterrupt:
                    self.terminate = True
                except:
                    log.exception('Unknown exception in rollout worker')
                    self.terminate = True

        if self.worker_idx <= 1:
            time.sleep(0.1)
            log.info(
                'Env runner %d, CPU aff. %r, rollouts %d: timing %s',
                self.worker_idx,
                psutil.Process().cpu_affinity(),
                self.num_complete_rollouts,
                timing,
            )
Beispiel #21
0
    def run(self):
        for p in self.processes:
            time.sleep(0.3)
            p.start()

        finished_reset = np.zeros([self.cfg.num_workers], dtype=np.bool)
        while not all(finished_reset):
            try:
                msg = self.report_queue.get(timeout=0.1)
                if 'finished_reset' in msg:
                    finished_reset[msg['proc_idx']] = True
                    log.debug('Process %d finished reset! Status %r',
                              msg['proc_idx'], finished_reset)
            except Empty:
                pass

        log.debug('All workers finished reset!')
        time.sleep(2)
        self.start_event.set()

        start = time.time()
        env_frames = 0
        last_process_report = [time.time() for _ in self.processes]

        while not self.terminate.value:
            try:
                try:
                    msgs = self.report_queue.get_many(
                        timeout=self.report_every_sec * 1.5)
                    for msg in msgs:
                        last_process_report[msg['proc_idx']] = time.time()

                        if 'crash' in msg:
                            self.terminate.value = True
                            log.error(
                                'Terminating due to process %d crashing...',
                                msg['proc_idx'])
                            break

                        env_frames += msg['env_frames']

                    if env_frames >= self.cfg.sample_env_frames:
                        log.warning('Desired number of frames reached')
                        self.terminate.value = True

                    if time.time() - start > self.cfg.timeout_seconds:
                        log.warning('Terminated by timer')
                        self.terminate.value = True
                except Empty:
                    pass
            except KeyboardInterrupt:
                self.terminate.value = True
                log.error('KeyboardInterrupt in main loop! Terminating...')
                break

            if time.time() - self.last_report > self.report_every_sec:
                self.report(env_frames)

            for proc_idx, p in enumerate(self.processes):
                delay = time.time() - last_process_report[proc_idx]
                if delay > 600:
                    # killing the whole script is the best way to know that some of the processes froze
                    log.error(
                        'Process %d had not responded in %.1f s!!! Terminating...',
                        proc_idx, delay)
                    self.terminate.value = True

            for p in self.processes:
                if not p.is_alive():
                    self.terminate.value = True
                    log.error('Process %r died! terminating...', p)

        total_time = time.time() - start
        log.info('Collected %d frames in %.1f s, avg FPS: %.1f', env_frames,
                 total_time, env_frames / total_time)
        log.debug('Done sampling...')
Beispiel #22
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        if self.cfg.sampler_worker_gpus:
            set_gpus_for_process(
                proc_idx,
                num_gpus_per_process=1,
                process_type='sampler_proc',
                gpu_mask=self.cfg.sampler_worker_gpus,
            )

        timing = Timing()

        from threadpoolctl import threadpool_limits
        with threadpool_limits(limits=1, user_api=None):
            if self.cfg.set_workers_cpu_affinity:
                set_process_cpu_affinity(proc_idx, self.cfg.num_workers)

            initial_cpu_affinity = psutil.Process().cpu_affinity(
            ) if platform != 'darwin' else None
            psutil.Process().nice(10)

            with timing.timeit('env_init'):
                envs = []
                env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)]

                for env_idx in range(self.cfg.num_envs_per_worker):
                    global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx
                    env_config = AttrDict(worker_index=proc_idx,
                                          vector_index=env_idx,
                                          env_id=global_env_id)

                    env = make_env_func(cfg=self.cfg, env_config=env_config)
                    log.debug(
                        'CPU affinity after create_env: %r',
                        psutil.Process().cpu_affinity()
                        if platform != 'darwin' else 'MacOS - None')
                    env.seed(global_env_id)
                    envs.append(env)

                    # this is to track the performance for individual DMLab levels
                    if hasattr(env.unwrapped, 'level_name'):
                        env_key[env_idx] = env.unwrapped.level_name

                episode_length = [0 for _ in envs]
                episode_lengths = [deque([], maxlen=20) for _ in envs]

            # sample a lot of random actions once, otherwise it is pretty slow in Python
            total_random_actions = 500
            actions = [[
                env.action_space.sample() for _ in range(env.num_agents)
            ] for _ in range(total_random_actions)]
            action_i = 0

            try:
                with timing.timeit('first_reset'):
                    for env_idx, env in enumerate(envs):
                        env.reset()
                        log.info('Process %d finished resetting %d/%d envs',
                                 proc_idx, env_idx + 1, len(envs))

                    self.report_queue.put(
                        dict(proc_idx=proc_idx, finished_reset=True))

                self.start_event.wait()

                with timing.timeit('work'):
                    last_report = last_report_frames = total_env_frames = 0
                    while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                        for env_idx, env in enumerate(envs):
                            with timing.add_time(f'{env_key[env_idx]}.step'):
                                obs, rewards, dones, infos = env.step(
                                    actions[action_i])
                                action_i = (action_i +
                                            1) % total_random_actions

                            num_frames = sum(
                                [info.get('num_frames', 1) for info in infos])
                            total_env_frames += num_frames
                            episode_length[env_idx] += num_frames

                            if all(dones):
                                episode_lengths[env_idx].append(
                                    episode_length[env_idx] / env.num_agents)
                                episode_length[env_idx] = 0

                        with timing.add_time('report'):
                            now = time.time()
                            if now - last_report > self.report_every_sec:
                                last_report = now
                                frames_since_last_report = total_env_frames - last_report_frames
                                last_report_frames = total_env_frames
                                self.report_queue.put(
                                    dict(proc_idx=proc_idx,
                                         env_frames=frames_since_last_report))

                                if proc_idx == 0:
                                    log.debug('Memory usage: %.4f Mb',
                                              memory_consumption_mb())

                # Extra check to make sure cpu affinity is preserved throughout the execution.
                # I observed weird effect when some environments tried to alter affinity of the current process, leading
                # to decreased performance.
                # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc.
                # At least user should know about it if this is happening.
                cpu_affinity = psutil.Process().cpu_affinity(
                ) if platform != 'darwin' else None
                assert initial_cpu_affinity == cpu_affinity, \
                    f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \
                    f'This can significantly affect performance!'

            except:
                log.exception('Unknown exception')
                log.error('Unknown exception in worker %d, terminating...',
                          proc_idx)
                self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

            time.sleep(proc_idx * 0.01 + 0.01)
            log.info('Process %d finished sampling. Timing: %s', proc_idx,
                     timing)

            for env_idx, env in enumerate(envs):
                if len(episode_lengths[env_idx]) > 0:
                    log.warning('Level %s avg episode len %d',
                                env_key[env_idx],
                                np.mean(episode_lengths[env_idx]))

            for env in envs:
                env.close()
Beispiel #23
0
    def run(self):
        """
        This function contains the main loop of the algorithm, as well as initialization/cleanup code.

        :return: ExperimentStatus (SUCCESS, FAILURE, INTERRUPTED). Useful in testing.
        """

        status = ExperimentStatus.SUCCESS

        if os.path.isfile(done_filename(self.cfg)):
            log.warning(
                'Training already finished! Remove "done" file to continue training'
            )
            return status

        self.init_workers()
        self.init_pbt()
        self.finish_initialization()

        log.info('Collecting experience...')

        timing = Timing()
        with timing.timeit('experience'):
            # noinspection PyBroadException
            try:
                while not self._should_end_training():
                    try:
                        reports = self.report_queue.get_many(timeout=0.1)
                        for report in reports:
                            self.process_report(report)
                    except Empty:
                        pass

                    if time.time() - self.last_report > self.report_interval:
                        self.report()

                        now = time.time()
                        self.total_train_seconds += now - self.last_report
                        self.last_report = now

                        self.update_env_steps_actor()

                    self.pbt.update(self.env_steps, self.policy_avg_stats)

            except Exception:
                log.exception('Exception in driver loop')
                status = ExperimentStatus.FAILURE
            except KeyboardInterrupt:
                log.warning(
                    'Keyboard interrupt detected in driver loop, exiting...')
                status = ExperimentStatus.INTERRUPTED

        for learner in self.learner_workers.values():
            # timeout is needed here because some environments may crash on KeyboardInterrupt (e.g. VizDoom)
            # Therefore the learner train loop will never do another iteration and will never save the model.
            # This is not an issue with normal exit, e.g. due to desired number of frames reached.
            learner.save_model(timeout=5.0)

        all_workers = self.actor_workers
        for workers in self.policy_workers.values():
            all_workers.extend(workers)
        all_workers.extend(self.learner_workers.values())

        child_processes = list_child_processes()

        time.sleep(0.1)
        log.debug('Closing workers...')
        for i, w in enumerate(all_workers):
            w.close()
            time.sleep(0.01)
        for i, w in enumerate(all_workers):
            w.join()
        log.debug('Workers joined!')

        finish_wandb(self.cfg)

        # VizDoom processes often refuse to die for an unidentified reason, so we're force killing them with a hack
        kill_processes(child_processes)

        fps = self.total_env_steps_since_resume / timing.experience
        log.info('Collected %r, FPS: %.1f', self.env_steps, fps)
        log.info('Timing: %s', timing)

        if self._should_end_training():
            with open(done_filename(self.cfg), 'w') as fobj:
                fobj.write(f'{self.env_steps}')

        time.sleep(0.5)
        log.info('Done!')

        return status
Beispiel #24
0
    def _run(self):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        psutil.Process().nice(min(self.cfg.default_niceness + 2, 20))

        cuda_envvars_for_policy(self.policy_id, 'inference')
        torch.multiprocessing.set_sharing_strategy('file_system')

        timing = Timing()

        with timing.timeit('init'):
            # initialize the Torch modules
            log.info('Initializing model on the policy worker %d-%d...',
                     self.policy_id, self.worker_idx)
            log.info(
                f'POLICY worker {self.policy_id}-{self.worker_idx}\tpid {os.getpid()}\tparent {os.getppid()}'
            )

            torch.set_num_threads(1)

            if self.cfg.device == 'gpu':
                # we should already see only one CUDA device, because of env vars
                assert torch.cuda.device_count() == 1
                self.device = torch.device('cuda', index=0)
            else:
                self.device = torch.device('cpu')

            self.actor_critic = create_actor_critic(self.cfg, self.obs_space,
                                                    self.action_space, timing)
            self.actor_critic.model_to_device(self.device)
            for p in self.actor_critic.parameters():
                p.requires_grad = False  # we don't train anything here

            log.info('Initialized model on the policy worker %d-%d!',
                     self.policy_id, self.worker_idx)

        last_report = last_cache_cleanup = time.time()
        last_report_samples = 0
        request_count = deque(maxlen=50)

        # very conservative limit on the minimum number of requests to wait for
        # this will almost guarantee that the system will continue collecting experience
        # at max rate even when 2/3 of workers are stuck for some reason (e.g. doing a long env reset)
        # Although if your workflow involves very lengthy operations that often freeze workers, it can be beneficial
        # to set min_num_requests to 1 (at a cost of potential inefficiency, i.e. policy worker will use very small
        # batches)
        min_num_requests = self.cfg.num_workers // (
            self.cfg.num_policies * self.cfg.policy_workers_per_policy)
        min_num_requests //= 3
        min_num_requests = max(1, min_num_requests)
        log.info('Min num requests: %d', min_num_requests)

        # Again, very conservative timer. Only wait a little bit, then continue operation.
        wait_for_min_requests = 0.025

        while not self.terminate:
            try:
                while self.shared_buffers.stop_experience_collection[
                        self.policy_id]:
                    with self.resume_experience_collection_cv:
                        self.resume_experience_collection_cv.wait(timeout=0.05)

                waiting_started = time.time()
                while len(self.requests) < min_num_requests and time.time(
                ) - waiting_started < wait_for_min_requests:
                    try:
                        with timing.timeit('wait_policy'), timing.add_time(
                                'wait_policy_total'):
                            policy_requests = self.policy_queue.get_many(
                                timeout=0.005)
                        self.requests.extend(policy_requests)
                    except Empty:
                        pass

                self._update_weights(timing)

                with timing.timeit('one_step'), timing.add_time(
                        'handle_policy_step'):
                    if self.initialized:
                        if len(self.requests) > 0:
                            request_count.append(len(self.requests))
                            self._handle_policy_steps(timing)

                try:
                    task_type, data = self.task_queue.get_nowait()

                    # task from the task_queue
                    if task_type == TaskType.INIT:
                        self._init()
                    elif task_type == TaskType.TERMINATE:
                        self.terminate = True
                        break
                    elif task_type == TaskType.INIT_MODEL:
                        self._init_model(data)

                    self.task_queue.task_done()
                except Empty:
                    pass

                if time.time() - last_report > 3.0 and 'one_step' in timing:
                    timing_stats = dict(wait_policy=timing.wait_policy,
                                        step_policy=timing.one_step)
                    samples_since_last_report = self.total_num_samples - last_report_samples

                    stats = memory_stats('policy_worker', self.device)
                    if len(request_count) > 0:
                        stats['avg_request_count'] = np.mean(request_count)

                    self.report_queue.put(
                        dict(
                            timing=timing_stats,
                            samples=samples_since_last_report,
                            policy_id=self.policy_id,
                            stats=stats,
                        ))
                    last_report = time.time()
                    last_report_samples = self.total_num_samples

                if time.time() - last_cache_cleanup > 300.0 or (
                        not self.cfg.benchmark
                        and self.total_num_samples < 1000):
                    if self.cfg.device == 'gpu':
                        torch.cuda.empty_cache()
                    last_cache_cleanup = time.time()

            except KeyboardInterrupt:
                log.warning('Keyboard interrupt detected on worker %d-%d',
                            self.policy_id, self.worker_idx)
                self.terminate = True
            except:
                log.exception('Unknown exception on policy worker')
                self.terminate = True

        time.sleep(0.2)
        log.info('Policy worker avg. requests %.2f, timing: %s',
                 np.mean(request_count), timing)
Beispiel #25
0
    def __init__(
        self,
        task_id,
        level,
        action_repeat,
        res_w,
        res_h,
        benchmark_mode,
        renderer,
        dataset_path,
        with_instructions,
        extended_action_set,
        use_level_cache,
        level_cache_path,
        gpu_index,
        extra_cfg=None,
    ):
        self.width = res_w
        self.height = res_h

        # self._main_observation = 'DEBUG.CAMERA_INTERLEAVED.PLAYER_VIEW_NO_RETICLE'
        self.main_observation = 'RGB_INTERLEAVED'
        self.instructions_observation = DMLAB_INSTRUCTIONS
        self.with_instructions = with_instructions and not benchmark_mode

        self.action_repeat = action_repeat

        self.random_state = None

        self.task_id = task_id
        self.level = level
        self.level_name = dmlab_level_to_level_name(self.level)

        # the policy index which currently acts in the environment
        self.curr_policy_idx = 0
        self.curr_cache = dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[
            self.curr_policy_idx]

        self.instructions = np.zeros([DMLAB_MAX_INSTRUCTION_LEN],
                                     dtype=np.int32)

        observation_format = [self.main_observation]
        if self.with_instructions:
            observation_format += [self.instructions_observation]

        config = {
            'width': self.width,
            'height': self.height,
            'gpuDeviceIndex': str(gpu_index),
            'datasetPath': dataset_path,
        }

        if extra_cfg is not None:
            config.update(extra_cfg)
        config = {k: str(v) for k, v in config.items()}

        self.use_level_cache = use_level_cache
        self.level_cache_path = ensure_dir_exists(level_cache_path)

        env_level_cache = self if use_level_cache else None
        self.env_uses_level_cache = False  # will be set to True when this env instance queries the cache
        self.last_reset_seed = None

        if env_level_cache is not None:
            if not isinstance(self.curr_cache,
                              dmlab_level_cache.DmlabLevelCacheGlobal):
                raise Exception(
                    'DMLab global level cache object is not initialized! Make sure to call'
                    'dmlab_ensure_global_cache_initialized() in the main thread before you fork any child processes'
                    'or create any DMLab envs')

        self.dmlab = deepmind_lab.Lab(
            level,
            observation_format,
            config=config,
            renderer=renderer,
            level_cache=env_level_cache,
        )

        self.action_set = EXTENDED_ACTION_SET if extended_action_set else ACTION_SET
        self.action_list = np.array(
            self.action_set,
            dtype=np.intc)  # DMLAB requires intc type for actions

        self.last_observation = None

        self.render_scale = 5
        self.render_fps = 30
        self.last_frame = time.time()

        self.action_space = gym.spaces.Discrete(len(self.action_set))

        self.observation_space = gym.spaces.Dict(
            obs=gym.spaces.Box(low=0,
                               high=255,
                               shape=(self.height, self.width, 3),
                               dtype=np.uint8))
        if self.with_instructions:
            self.observation_space.spaces[
                self.instructions_observation] = gym.spaces.Box(
                    low=0,
                    high=DMLAB_VOCABULARY_SIZE,
                    shape=[DMLAB_MAX_INSTRUCTION_LEN],
                    dtype=np.int32,
                )

        self.benchmark_mode = benchmark_mode
        if self.benchmark_mode:
            log.warning(
                'DmLab benchmark mode is true! Use this only for testing, not for actual training runs!'
            )

        self.seed()
Beispiel #26
0
def multi_agent_match(policy_indices, max_num_episodes=int(1e9), max_num_frames=1e10):
    log.debug('Starting eval process with policies %r', policy_indices)
    for i, rival in enumerate(RIVALS):
        rival.policy_index = policy_indices[i]

    curr_dir = os.path.dirname(os.path.abspath(__file__))
    evaluation_filename = join(curr_dir, f'eval_{"vs".join([str(pi) for pi in policy_indices])}.txt')
    with open(evaluation_filename, 'w') as fobj:
        fobj.write('start\n')

    common_config = RIVALS[0].cfg

    render_action_repeat = common_config.render_action_repeat if common_config.render_action_repeat is not None else common_config.env_frameskip
    if render_action_repeat is None:
        log.warning('Not using action repeat!')
        render_action_repeat = 1
    log.debug('Using action repeat %d during evaluation', render_action_repeat)

    common_config.env_frameskip = 1  # for evaluation
    common_config.num_envs = 1
    common_config.timelimit = 4.0  # for faster evaluation

    def make_env_func(env_config):
        return create_env(ENV_NAME, cfg=common_config, env_config=env_config)

    env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0}))
    env.seed(0)

    is_multiagent = is_multiagent_env(env)
    if not is_multiagent:
        env = MultiAgentWrapper(env)
    else:
        assert env.num_agents == len(RIVALS)

    device = torch.device('cuda')
    for rival in RIVALS:
        rival.actor_critic = create_actor_critic(rival.cfg, env.observation_space, env.action_space)
        rival.actor_critic.model_to_device(device)

        policy_id = rival.policy_index
        checkpoints = LearnerWorker.get_checkpoints(LearnerWorker.checkpoint_dir(rival.cfg, policy_id))
        checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device)
        rival.actor_critic.load_state_dict(checkpoint_dict['model'])

    episode_rewards = []
    num_frames = 0

    last_render_start = time.time()

    def max_frames_reached(frames):
        return max_num_frames is not None and frames > max_num_frames

    wins = [0 for _ in RIVALS]
    ties = 0
    frag_differences = []

    with torch.no_grad():
        for _ in range(max_num_episodes):
            obs = env.reset()
            obs_dict_torch = dict()

            done = [False] * len(obs)
            for rival in RIVALS:
                rival.rnn_states = torch.zeros([1, rival.cfg.hidden_size], dtype=torch.float32, device=device)

            episode_reward = 0
            prev_frame = time.time()

            while True:
                actions = []
                for i, obs_dict in enumerate(obs):
                    for key, x in obs_dict.items():
                        obs_dict_torch[key] = torch.from_numpy(x).to(device).float().view(1, *x.shape)

                    rival = RIVALS[i]
                    policy_outputs = rival.actor_critic(obs_dict_torch, rival.rnn_states)
                    rival.rnn_states = policy_outputs.rnn_states
                    actions.append(policy_outputs.actions[0].cpu().numpy())

                for _ in range(render_action_repeat):
                    if not NO_RENDER:
                        target_delay = 1.0 / FPS if FPS > 0 else 0
                        current_delay = time.time() - last_render_start
                        time_wait = target_delay - current_delay

                        if time_wait > 0:
                            # log.info('Wait time %.3f', time_wait)
                            time.sleep(time_wait)

                        last_render_start = time.time()
                        env.render()

                    obs, rew, done, infos = env.step(actions)
                    if all(done):
                        log.debug('Finished episode!')

                        frag_diff = infos[0]['PLAYER1_FRAGCOUNT'] - infos[0]['PLAYER2_FRAGCOUNT']
                        if frag_diff > 0:
                            wins[0] += 1
                        elif frag_diff < 0:
                            wins[1] += 1
                        else:
                            ties += 1

                        frag_differences.append(frag_diff)
                        avg_frag_diff = np.mean(frag_differences)

                        report = f'wins: {wins}, ties: {ties}, avg_frag_diff: {avg_frag_diff}'
                        with open(evaluation_filename, 'a') as fobj:
                            fobj.write(report + '\n')

                    # log.info('%d:%d', infos[0]['PLAYER1_FRAGCOUNT'], infos[0]['PLAYER2_FRAGCOUNT'])

                    episode_reward += np.mean(rew)
                    num_frames += 1

                    if num_frames % 100 == 0:
                        log.debug('%.1f', render_action_repeat / (time.time() - prev_frame))
                    prev_frame = time.time()

                    if all(done):
                        log.info('Episode finished at %d frames', num_frames)
                        break

                if all(done) or max_frames_reached(num_frames):
                    break

            if not NO_RENDER:
                env.render()
            time.sleep(0.01)

            episode_rewards.append(episode_reward)
            last_episodes = episode_rewards[-100:]
            avg_reward = sum(last_episodes) / len(last_episodes)
            log.info(
                'Episode reward: %f, avg reward for %d episodes: %f', episode_reward, len(last_episodes), avg_reward,
            )

            if max_frames_reached(num_frames):
                break

    env.close()
Beispiel #27
0
def enjoy(cfg, max_num_frames=1e9):
    cfg = load_from_checkpoint(cfg)

    render_action_repeat = cfg.render_action_repeat if cfg.render_action_repeat is not None else cfg.env_frameskip
    if render_action_repeat is None:
        log.warning('Not using action repeat!')
        render_action_repeat = 1
    log.debug('Using action repeat %d during evaluation', render_action_repeat)

    cfg.env_frameskip = 1  # for evaluation
    cfg.num_envs = 1

    def make_env_func(env_config):
        return create_env(cfg.env, cfg=cfg, env_config=env_config)

    env = make_env_func(AttrDict({'worker_index': 0, 'vector_index': 0}))
    # env.seed(0)

    is_multiagent = is_multiagent_env(env)
    if not is_multiagent:
        env = MultiAgentWrapper(env)

    if hasattr(env.unwrapped, 'reset_on_init'):
        # reset call ruins the demo recording for VizDoom
        env.unwrapped.reset_on_init = False

    actor_critic = create_actor_critic(cfg, env.observation_space,
                                       env.action_space)

    device = torch.device('cpu' if cfg.device == 'cpu' else 'cuda')
    actor_critic.model_to_device(device)

    policy_id = cfg.policy_index
    checkpoints = LearnerWorker.get_checkpoints(
        LearnerWorker.checkpoint_dir(cfg, policy_id))
    checkpoint_dict = LearnerWorker.load_checkpoint(checkpoints, device)
    actor_critic.load_state_dict(checkpoint_dict['model'])

    episode_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
    true_rewards = [deque([], maxlen=100) for _ in range(env.num_agents)]
    num_frames = 0

    last_render_start = time.time()

    def max_frames_reached(frames):
        return max_num_frames is not None and frames > max_num_frames

    obs = env.reset()
    rnn_states = torch.zeros(
        [env.num_agents, get_hidden_size(cfg)],
        dtype=torch.float32,
        device=device)
    episode_reward = np.zeros(env.num_agents)
    finished_episode = [False] * env.num_agents

    with torch.no_grad():
        while not max_frames_reached(num_frames):
            obs_torch = AttrDict(transform_dict_observations(obs))
            for key, x in obs_torch.items():
                obs_torch[key] = torch.from_numpy(x).to(device).float()

            policy_outputs = actor_critic(obs_torch,
                                          rnn_states,
                                          with_action_distribution=True)

            # sample actions from the distribution by default
            actions = policy_outputs.actions

            action_distribution = policy_outputs.action_distribution
            if isinstance(action_distribution, ContinuousActionDistribution):
                if not cfg.continuous_actions_sample:  # TODO: add similar option for discrete actions
                    actions = action_distribution.means

            actions = actions.cpu().numpy()

            rnn_states = policy_outputs.rnn_states

            for _ in range(render_action_repeat):
                if not cfg.no_render:
                    target_delay = 1.0 / cfg.fps if cfg.fps > 0 else 0
                    current_delay = time.time() - last_render_start
                    time_wait = target_delay - current_delay

                    if time_wait > 0:
                        # log.info('Wait time %.3f', time_wait)
                        time.sleep(time_wait)

                    last_render_start = time.time()
                    env.render()

                obs, rew, done, infos = env.step(actions)

                episode_reward += rew
                num_frames += 1

                for agent_i, done_flag in enumerate(done):
                    if done_flag:
                        finished_episode[agent_i] = True
                        episode_rewards[agent_i].append(
                            episode_reward[agent_i])
                        true_rewards[agent_i].append(infos[agent_i].get(
                            'true_reward', episode_reward[agent_i]))
                        log.info(
                            'Episode finished for agent %d at %d frames. Reward: %.3f, true_reward: %.3f',
                            agent_i, num_frames, episode_reward[agent_i],
                            true_rewards[agent_i][-1])
                        rnn_states[agent_i] = torch.zeros(
                            [get_hidden_size(cfg)],
                            dtype=torch.float32,
                            device=device)
                        episode_reward[agent_i] = 0

                # if episode terminated synchronously for all agents, pause a bit before starting a new one
                if all(done):
                    if not cfg.no_render:
                        env.render()
                    time.sleep(0.05)

                if all(finished_episode):
                    finished_episode = [False] * env.num_agents
                    avg_episode_rewards_str, avg_true_reward_str = '', ''
                    for agent_i in range(env.num_agents):
                        avg_rew = np.mean(episode_rewards[agent_i])
                        avg_true_rew = np.mean(true_rewards[agent_i])
                        if not np.isnan(avg_rew):
                            if avg_episode_rewards_str:
                                avg_episode_rewards_str += ', '
                            avg_episode_rewards_str += f'#{agent_i}: {avg_rew:.3f}'
                        if not np.isnan(avg_true_rew):
                            if avg_true_reward_str:
                                avg_true_reward_str += ', '
                            avg_true_reward_str += f'#{agent_i}: {avg_true_rew:.3f}'

                    log.info('Avg episode rewards: %s, true rewards: %s',
                             avg_episode_rewards_str, avg_true_reward_str)
                    log.info(
                        'Avg episode reward: %.3f, avg true_reward: %.3f',
                        np.mean([
                            np.mean(episode_rewards[i])
                            for i in range(env.num_agents)
                        ]),
                        np.mean([
                            np.mean(true_rewards[i])
                            for i in range(env.num_agents)
                        ]))

                # VizDoom multiplayer stuff
                # for player in [1, 2, 3, 4, 5, 6, 7, 8]:
                #     key = f'PLAYER{player}_FRAGCOUNT'
                #     if key in infos[0]:
                #         log.debug('Score for player %d: %r', player, infos[0][key])

    env.close()

    return ExperimentStatus.SUCCESS, np.mean(episode_rewards)
Beispiel #28
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        psutil.Process().nice(10)

        num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE)
        assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...'
        assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker'

        with timing.timeit('env_init'):
            env_key = 'env'
            env_desired_num_levels = 0

            global_env_id = proc_idx * self.cfg.num_envs_per_worker
            env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id)
            env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config)
            env.seed(global_env_id)

            # this is to track the performance for individual DMLab levels
            if hasattr(env.unwrapped, 'level_name'):
                env_key = env.unwrapped.level_name
                env_level = env.unwrapped.level

                approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[env_key]
                num_billions = DESIRED_TRAINING_LENGTH / int(1e9)
                num_workers_for_env = self.cfg.num_workers // num_envs
                env_desired_num_levels = int((approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env)

                env_num_levels_generated = len(dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0].all_seeds[env_level]) // num_workers_for_env

                log.warning('Worker %d (env %s) generated %d/%d levels!', proc_idx, env_key, env_num_levels_generated, env_desired_num_levels)
                time.sleep(4)

            env.reset()
            env_uses_level_cache = env.unwrapped.env_uses_level_cache

            self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True))

        self.start_event.wait()

        try:
            with timing.timeit('work'):
                last_report = last_report_frames = total_env_frames = 0
                while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                    action = env.action_space.sample()
                    with timing.add_time(f'{env_key}.step'):
                        env.step(action)

                    total_env_frames += 1

                    with timing.add_time(f'{env_key}.reset'):
                        env.reset()
                        env_num_levels_generated += 1
                        log.debug('Env %s done %d/%d resets', env_key, env_num_levels_generated, env_desired_num_levels)

                    if env_num_levels_generated >= env_desired_num_levels:
                        log.debug('%s finished %d/%d resets, sleeping...', env_key, env_num_levels_generated, env_desired_num_levels)
                        time.sleep(30)  # free up CPU time for other envs

                    # if env does not use level cache, there is no need to run it
                    # let other workers proceed
                    if not env_uses_level_cache:
                        log.debug('Env %s does not require cache, sleeping...', env_key)
                        time.sleep(200)

                    with timing.add_time('report'):
                        now = time.time()
                        if now - last_report > self.report_every_sec:
                            last_report = now
                            frames_since_last_report = total_env_frames - last_report_frames
                            last_report_frames = total_env_frames
                            self.report_queue.put(dict(proc_idx=proc_idx, env_frames=frames_since_last_report))

                            if get_free_disk_space_mb(self.cfg) < 3 * 1024:
                                log.error('Not enough disk space! %d', get_free_disk_space_mb(self.cfg))
                                time.sleep(200)
        except:
            log.exception('Unknown exception')
            log.error('Unknown exception in worker %d, terminating...', proc_idx)
            self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

        time.sleep(proc_idx * 0.1 + 0.1)
        log.info('Process %d finished sampling. Timing: %s', proc_idx, timing)

        env.close()
Beispiel #29
0
    def __init__(self,
                 action_space,
                 config_file,
                 coord_limits=None,
                 max_histogram_length=200,
                 show_automap=False,
                 skip_frames=1,
                 async_mode=False,
                 record_to=None):
        self.initialized = False

        # essential game data
        self.game = None
        self.state = None
        self.curr_seed = 0
        self.rng = None
        self.skip_frames = skip_frames
        self.async_mode = async_mode

        # optional - for topdown view rendering and visitation heatmaps
        self.show_automap = show_automap
        self.coord_limits = coord_limits

        # can be adjusted after the environment is created (but before any reset() call) via observation space wrapper
        self.screen_w, self.screen_h, self.channels = 640, 480, 3
        self.screen_resolution = ScreenResolution.RES_640X480
        self.calc_observation_space()

        self.black_screen = None

        # provided as a part of environment definition, since these depend on the scenario and
        # can be quite complex multi-discrete spaces
        self.action_space = action_space
        self.composite_action_space = hasattr(self.action_space, 'spaces')

        self.delta_actions_scaling_factor = 7.5

        if os.path.isabs(config_file):
            self.config_path = config_file
        else:
            scenarios_dir = join(os.path.dirname(__file__), 'scenarios')
            self.config_path = join(scenarios_dir, config_file)
            if not os.path.isfile(self.config_path):
                log.warning(
                    'File %s not found in scenarios dir %s. Consider providing absolute path?',
                    config_file, scenarios_dir,
                )

        self.variable_indices = self._parse_variable_indices(self.config_path)

        # only created if we call render() method
        self.viewer = None

        # record full episodes using VizDoom recording functionality
        self.record_to = record_to

        self.is_multiplayer = False  # overridden in derived classes

        # (optional) histogram to track positional coverage
        # do not pass coord_limits if you don't need this, to avoid extra calculation
        self.max_histogram_length = max_histogram_length
        self.current_histogram, self.previous_histogram = None, None
        if self.coord_limits:
            x = (self.coord_limits[2] - self.coord_limits[0])
            y = (self.coord_limits[3] - self.coord_limits[1])
            if x > y:
                len_x = self.max_histogram_length
                len_y = int((y / x) * self.max_histogram_length)
            else:
                len_x = int((x / y) * self.max_histogram_length)
                len_y = self.max_histogram_length
            self.current_histogram = np.zeros((len_x, len_y), dtype=np.int32)
            self.previous_histogram = np.zeros_like(self.current_histogram)

        # helpers for human play with pynput keyboard input
        self._terminate = False
        self._current_actions = []
        self._actions_flattened = None

        self._prev_info = None
        self._last_episode_info = None

        self._num_episodes = 0

        self.mode = 'algo'

        self.seed()