Exemple #1
0
    def _game_init(self, with_locking=True, max_parallel=10):
        lock_file = lock = None
        if with_locking:
            lock_file = doom_lock_file(max_parallel)
            lock = FileLock(lock_file)

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                if with_locking:
                    with lock.acquire(timeout=20):
                        self.game.init()
                else:
                    self.game.init()

                break
            except Timeout:
                if with_locking:
                    log.debug(
                        'Another process currently holds the lock %s, attempt: %d',
                        lock_file,
                        init_attempt,
                    )
            except Exception as exc:
                log.warning(
                    'VizDoom game.init() threw an exception %r. Terminate process...',
                    exc)
                from seed_rl.envs.env_utils import EnvCriticalError
                raise EnvCriticalError()
Exemple #2
0
def find_available_port(start_port, increment=1000):
    port = start_port
    while port < 65535 and not is_udp_port_available(port):
        port += increment

    log.debug('Port %r is available', port)
    return port
Exemple #3
0
def load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        raise Exception(
            f'Could not load saved parameters for experiment {cfg.experiment}')

    with open(filename, 'r') as json_file:
        json_params = json.load(json_file)
        log.warning('Loading existing experiment configuration from %s',
                    filename)
        loaded_cfg = AttrDict(json_params)

    # override the parameters in config file with values passed from command line
    for key, value in vars(cfg.cli_args).items():
        if loaded_cfg[key] != value:
            log.debug(
                'Overriding arg %r with value %r passed from command line',
                key, value)
            loaded_cfg[key] = value

    # incorporate extra CLI parameters that were not present in JSON file
    for key, value in vars(cfg).items():
        if key not in loaded_cfg:
            log.debug(
                'Adding new argument %r=%r that is not in the saved config file!',
                key, value)
            loaded_cfg[key] = value

    return loaded_cfg
Exemple #4
0
    def __init__(self, cfg, obs_space, timing):
        super().__init__(cfg, timing)

        self.basic_encoder = create_standard_encoder(cfg, obs_space, timing)
        self.encoder_out_size = self.basic_encoder.encoder_out_size

        # same as IMPALA paper
        self.embedding_size = 20
        self.instructions_lstm_units = 64
        self.instructions_lstm_layers = 1

        padding_idx = 0
        self.word_embedding = nn.Embedding(
            num_embeddings=DMLAB_VOCABULARY_SIZE,
            embedding_dim=self.embedding_size,
            padding_idx=padding_idx)

        self.instructions_lstm = nn.LSTM(
            input_size=self.embedding_size,
            hidden_size=self.instructions_lstm_units,
            num_layers=self.instructions_lstm_layers,
            batch_first=True,
        )

        # learnable initial state?
        # initial_hidden_values = torch.normal(0, 1, size=(self.instructions_lstm_units, ))
        # self.lstm_h0 = nn.Parameter(initial_hidden_values, requires_grad=True)
        # self.lstm_c0 = nn.Parameter(initial_hidden_values, requires_grad=True)

        self.encoder_out_size += self.instructions_lstm_units
        log.debug('Policy head output size: %r', self.encoder_out_size)

        self.cpu_device = torch.device('cpu')
Exemple #5
0
    def predict(self, imagined_action_lists):
        start = time.time()
        assert len(imagined_action_lists) == self.num_envs
        imagined_action_lists = np.split(np.array(imagined_action_lists),
                                         self.num_workers)
        for worker, imagined_action_list in zip(self.workers,
                                                imagined_action_lists):
            worker.task_queue.put(
                (imagined_action_list, MsgType.STEP_IMAGINED))

        observations = []
        rewards = []
        dones = []
        for worker in self.workers:
            worker.task_queue.join()
            results_per_worker = safe_get(
                worker.result_queue,
                timeout=1.0,
                msg=
                'Took a surprisingly long time to predict the future, retrying...',
            )

            assert len(results_per_worker) == len(imagined_action_lists[0])
            for result in results_per_worker:
                o, r, d, _ = zip(*result)
                observations.append(o)
                rewards.append(r)
                dones.append(d)
            worker.result_queue.task_done()

        if self._verbose:
            log.debug('Prediction step took %.4f s', time.time() - start)
        return observations, rewards, dones
Exemple #6
0
    def step(self, actions):
        if self.skip_frames > 1 or self.num_agents == 1:
            # not used in multi-agent mode due to VizDoom limitations
            # this means that we have only one agent (+ maybe some bots, which is why we're in multiplayer mode)
            return super().step(actions)

        self._ensure_initialized()

        actions_binary = self._convert_actions(actions)

        self.game.set_action(actions_binary)
        self.game.advance_action(1, self.update_state)
        self.timestep += 1

        if not self.update_state:
            return None, None, None, None

        state = self.game.get_state()
        reward = self.game.get_last_reward()
        done = self.game.is_episode_finished()

        if self.record_to is not None:
            # send 'stop recording' command 1 tick before the end of the episode
            # otherwise it does not get saved to disk
            if self.game.get_episode_time(
            ) + 1 == self.game.get_episode_timeout():
                log.debug('Calling stop recording command!')
                self.game.send_game_command('stop')

        observation, done, info = self._process_game_step(state, done, {})
        return observation, reward, done, info
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', type=str, default=None, required=True)
    parser.add_argument('--demo_path', type=str, default=None, required=True)
    args = parser.parse_args()

    spec = doom_env_by_name(args.env)
    cfg = default_cfg(env=args.env)
    if spec.num_agents <= 1:
        env = make_doom_env(args.env, cfg=cfg, custom_resolution='640x480')
    else:
        env = make_doom_env_impl(
            spec,
            cfg=cfg,
            custom_resolution='640x480',
            player_id=0,
            num_agents=spec.num_agents,
            max_num_players=spec.num_agents,
            num_bots=spec.num_bots,
        )

    mode = 'replay'
    env.unwrapped.mode = mode
    env.unwrapped.initialize()
    game = env.unwrapped.game

    game.replay_episode(args.demo_path)

    frames_dir = args.demo_path + '_frames'
    if os.path.exists(frames_dir):
        shutil.rmtree(frames_dir)
    os.makedirs(frames_dir)

    frame_id = 0
    while not game.is_episode_finished():
        # Use advance_action instead of make_action.
        game.advance_action()
        img = env.render(mode='rgb_array')

        frame_name = f'{frame_id:05d}.png'
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        if img is not None:
            cv2.imwrite(join(frames_dir, frame_name), img)

        frame_id += 1

        r = game.get_last_reward()
        log.debug('Reward %.3f at frame %d', r, frame_id)

    game.close()
Exemple #8
0
    def dbg_print(self):
        dbg_info = dict(
            entropy=self.entropy().mean(),
            kl_prior=self.kl_prior().mean(),
            min_logit=self.logits.min(),
            max_logit=self.logits.max(),
            min_prob=self.probs.min(),
            max_prob=self.probs.max(),
        )

        msg = ''
        for key, value in dbg_info.items():
            msg += f'{key}={value.cpu().item():.3f} '
        log.debug(msg)
Exemple #9
0
    def __init__(self, env, initial_difficulty=None):
        super().__init__(env)

        self._min_difficulty = 0
        self._max_difficulty = 150
        self._difficulty_step = 10
        self._curr_difficulty = 20 if initial_difficulty is None else initial_difficulty
        self._difficulty_std = 10

        log.info('Starting with bot difficulty %d', self._curr_difficulty)

        self._adaptive_curriculum = True
        if initial_difficulty == self._max_difficulty:
            log.debug(
                'Starting at max difficulty, disable adaptive skill curriculum'
            )
            self._adaptive_curriculum = False
Exemple #10
0
    def _ensure_initialized(self):
        if self.initialized:
            return

        self.workers = [
            MultiAgentEnvWorker(i,
                                self.make_env_func,
                                self.env_config,
                                reset_on_init=self.reset_on_init)
            for i in range(self.num_agents)
        ]

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                port_to_use = udp_port_num(self.env_config)
                port = find_available_port(port_to_use, increment=1000)
                log.debug('Using port %d', port)
                init_info = dict(port=port)

                lock_file = doom_lock_file(max_parallel=20)
                lock = FileLock(lock_file)
                with lock.acquire(timeout=10):
                    for i, worker in enumerate(self.workers):
                        worker.task_queue.put((init_info, TaskType.INIT))
                        if self.safe_init:
                            time.sleep(1.0)  # just in case
                        else:
                            time.sleep(0.05)

                    for i, worker in enumerate(self.workers):
                        worker.result_queue.get(timeout=5)
                        worker.result_queue.task_done()
                        worker.task_queue.join()
            except filelock.Timeout:
                continue
            except Exception:
                raise RuntimeError(
                    'Critical error: worker stuck on initialization. Abort!')
            else:
                break

        log.debug('%d agent workers initialized for env %d!',
                  len(self.workers), self.env_config.worker_index)
        self.initialized = True
Exemple #11
0
    def initialize(self):
        checkpoint_dict = self._load_checkpoint(self._checkpoint_dir())
        if checkpoint_dict is None:
            log.debug('Did not load from checkpoint, starting from scratch!')
        else:
            log.debug('Loading model from checkpoint')
            self._load_state(checkpoint_dict)

        log.debug('Experiment parameters:')
        for key, value in self._cfg_dict().items():
            log.debug('\t %s: %r', key, value)
Exemple #12
0
def init_multiplayer_env(make_env_func, player_id, env_config, init_info=None):
    env = make_env_func(player_id=player_id)

    if env_config is not None and 'worker_index' in env_config:
        env.unwrapped.worker_index = env_config.worker_index
    if env_config is not None and 'vector_index' in env_config:
        env.unwrapped.vector_index = env_config.vector_index

    if init_info is None:
        port_to_use = udp_port_num(env_config)
        port = find_available_port(port_to_use, increment=1000)
        log.debug('Using port %d', port)
        init_info = dict(port=port)

    env.unwrapped.init_info = init_info

    env.seed(env.unwrapped.worker_index * 1000 +
             env.unwrapped.vector_index * 10 + player_id)
    return env
Exemple #13
0
    def __init__(self,
                 num_envs,
                 num_workers,
                 make_env_func,
                 stats_episodes,
                 use_multiprocessing=True):
        tmp_env = make_env_func(None)
        if hasattr(tmp_env, 'num_agents'):
            self.num_agents = tmp_env.num_agents
        else:
            raise Exception('Expected multi-agent environment')

        global DEFAULT_UDP_PORT
        DEFAULT_UDP_PORT = find_available_port(DEFAULT_UDP_PORT)
        log.debug('Default UDP port changed to %r', DEFAULT_UDP_PORT)
        time.sleep(0.1)

        super().__init__(num_envs, num_workers, make_env_func, stats_episodes,
                         use_multiprocessing)
Exemple #14
0
    def record_used_seed(self, level, seed):
        self.num_seeds_used_in_current_run[level].value += 1
        log.debug('Updated number of used seeds for level %s (%d)', level,
                  self.num_seeds_used_in_current_run[level].value)

        used_lvl_seeds_dir = self.get_used_seeds_dir()
        used_seeds_filename = join(used_lvl_seeds_dir,
                                   level_to_filename(level))
        safe_ensure_dir_exists(os.path.dirname(used_seeds_filename))

        with open(used_seeds_filename, 'a') as fobj:
            fobj.write(f'{seed}\n')

        # this data structure is not shared across processes, but we mostly care about the initial
        # seeds anyway, which are initialized before the processes are forked
        if level not in self.used_seeds:
            self.used_seeds[level] = {seed}
        else:
            self.used_seeds[level].add(seed)
Exemple #15
0
    def _save(self):
        checkpoint = self._get_checkpoint_dict()
        assert checkpoint is not None

        filepath = join(
            self._checkpoint_dir(),
            f'checkpoint_{self.train_step:09d}_{self.env_steps}.pth')
        log.info('Saving %s...', filepath)
        torch.save(checkpoint, filepath)

        while len(self._get_checkpoints(
                self._checkpoint_dir())) > self.cfg.keep_checkpoints:
            oldest_checkpoint = self._get_checkpoints(
                self._checkpoint_dir())[0]
            if os.path.isfile(oldest_checkpoint):
                log.debug('Removing %s', oldest_checkpoint)
                os.remove(oldest_checkpoint)

        self._save_cfg()
Exemple #16
0
    def __init__(self, env_indices, make_env_func, use_multiprocessing):
        self._verbose = False

        self.make_env_func = make_env_func
        self.env_indices = env_indices
        self.timestep = 0

        if use_multiprocessing:
            self.task_queue, self.result_queue = JoinableQueue(
            ), JoinableQueue()
            self.process = Process(target=self.start, daemon=False)
        else:
            log.debug('Not using multiprocessing!')
            self.task_queue, self.result_queue = Queue(), Queue()
            # shouldn't be named "process" here, but who cares
            self.process = threading.Thread(target=self.start)

        self.is_multiagent = False
        self.process.start()
Exemple #17
0
    def __init__(self, num_agents, make_env_func, env_config, skip_frames):
        self.num_agents = num_agents
        log.debug('Multi agent env, num agents: %d', self.num_agents)
        self.skip_frames = skip_frames  # number of frames to skip (1 = no skip)

        env = make_env_func(
            player_id=-1
        )  # temporary env just to query observation_space and stuff
        self.action_space = env.action_space
        self.observation_space = env.observation_space

        # we can probably do this in a more generic way, but good enough for now
        self.default_reward_shaping = None
        if hasattr(env.unwrapped, '_reward_shaping_wrapper'):
            # noinspection PyProtectedMember
            self.default_reward_shaping = env.unwrapped._reward_shaping_wrapper.reward_shaping_scheme

        env.close()

        self.make_env_func = make_env_func

        self.safe_init = env_config is not None and env_config.get(
            'safe_init', False)

        if self.safe_init:
            sleep_seconds = env_config.worker_index * 1.0
            log.info(
                'Sleeping %.3f seconds to avoid creating all envs at once',
                sleep_seconds)
            time.sleep(sleep_seconds)
            log.info('Done sleeping at %d', env_config.worker_index)

        self.env_config = env_config
        self.workers = None

        # only needed when rendering
        self.enable_rendering = False
        self.last_obs = None

        self.reset_on_init = True

        self.initialized = False
Exemple #18
0
def make_dmlab_env_impl(spec, cfg, env_config, **kwargs):
    skip_frames = cfg.env_frameskip

    gpu_idx = 0
    if len(cfg.dmlab_gpus) > 0:
        if kwargs.get('env_config') is not None:
            vector_index = kwargs['env_config']['vector_index']
            gpu_idx = cfg.dmlab_gpus[vector_index % len(cfg.dmlab_gpus)]
            log.debug('Using GPU %d for DMLab rendering!', gpu_idx)

    task_id = get_task_id(env_config, spec, cfg)
    level = task_id_to_level(task_id, spec)
    log.debug('%r level %s task id %d', env_config, level, task_id)

    env = DmlabGymEnv(
        task_id,
        level,
        skip_frames,
        cfg.res_w,
        cfg.res_h,
        cfg.dmlab_throughput_benchmark,
        cfg.dmlab_renderer,
        get_dataset_path(cfg),
        cfg.dmlab_with_instructions,
        cfg.dmlab_extended_action_set,
        cfg.dmlab_use_level_cache,
        gpu_idx,
        spec.extra_cfg,
    )

    if env_config and 'env_id' in env_config:
        env.seed(env_config['env_id'])

    if 'record_to' in cfg and cfg.record_to is not None:
        env = RecordingWrapper(env, cfg.record_to, 0)

    if cfg.pixel_format == 'CHW':
        env = PixelFormatChwWrapper(env)

    env = DmlabRewardShapingWrapper(env)
    return env
Exemple #19
0
def create_multi_env(num_envs, num_workers, make_env_func, stats_episodes):
    """
    Create a vectorized env for single- and multi-agent case. This is only required for synchronous algorithms
    such as PPO and A2C. APPO uses a different mechanism with separate worker processes.
    """

    tmp_env = make_env_func(None)
    is_multiagent = hasattr(tmp_env, 'num_agents') and tmp_env.num_agents > 1

    if is_multiagent:
        assert num_envs % tmp_env.num_agents == 0
        log.debug('Num envs %d agents %d', num_envs, tmp_env.num_agents)
        num_envs = num_envs // tmp_env.num_agents
        from seed_rl.envs.doom.multiplayer.doom_multiagent_wrapper import MultiAgentEnvAggregator
        multi_env = MultiAgentEnvAggregator(num_envs, num_workers,
                                            make_env_func, stats_episodes)
    else:
        from seed_rl.algorithms.utils.multi_env import MultiEnv
        multi_env = MultiEnv(num_envs, num_workers, make_env_func,
                             stats_episodes)

    tmp_env.close()

    return multi_env
Exemple #20
0
    def test_doom_multiagent_multi_env(self):
        agents_per_env = 6
        num_envs = 2
        num_workers = 2

        skip_frames = 2  # hardcoded

        multi_env = MultiAgentEnvAggregator(
            num_envs=num_envs,
            num_workers=num_workers,
            make_env_func=self.make_standard_dm,
            stats_episodes=10,
            use_multiprocessing=True,
        )
        log.info('Before reset...')
        multi_env.reset()
        log.info('After reset...')

        actions = [multi_env.action_space.sample()] * (agents_per_env * num_envs)
        obs, rew, done, info = multi_env.step(actions)
        log.info('Rewards: %r', rew)

        start = time.time()
        num_steps = 300
        for i in range(num_steps):
            obs, rew, done, info = multi_env.step(actions)
            if i % 50 == 0:
                log.debug('Steps %d, rew: %r', i, rew)

        took = time.time() - start
        log.debug('Took %.3f sec to run %d steps, steps/sec: %.1f', took, num_steps, num_steps / took)
        log.debug('Observations fps: %.1f', num_steps * multi_env.num_agents * num_envs / took)
        log.debug('Environment fps: %.1f', num_steps * multi_env.num_agents * num_envs * skip_frames / took)

        multi_env.close()
        log.info('Done!')
Exemple #21
0
    def _ensure_initialized(self):
        if self.initialized:
            # Doom env already initialized!
            return

        self._create_doom_game(self.mode)
        port = DEFAULT_UDP_PORT if self.init_info is None else self.init_info.get(
            'port', DEFAULT_UDP_PORT)

        if self._is_server():
            log.info('Using port %d on host...', port)
            if not is_udp_port_available(port):
                raise Exception('Port %r unavailable', port)

            # This process will function as a host for a multiplayer game with this many players (including the host).
            # It will wait for other machines to connect using the -join parameter and then
            # start the game when everyone is connected.
            game_args_list = [
                f'-host {self.max_num_players}',
                f'-port {port}',
                '-deathmatch',  # Deathmatch rules are used for the game.
                f'+timelimit {self.timelimit}',  # The game (episode) will end after this many minutes have elapsed.
                '+sv_forcerespawn 1',  # Players will respawn automatically after they die.
                '+sv_noautoaim 1',  # Autoaim is disabled for all players.
                '+sv_respawnprotect 1',  # Players will be invulnerable for two second after spawning.
                '+sv_spawnfarthest 1',  # Players will be spawned as far as possible from any other players.
                '+sv_nocrouch 1',  # Disables crouching.
                '+sv_nojump 1',  # Disables jumping.
                '+sv_nofreelook 1',  # Disables free look with a mouse (only keyboard).
                '+sv_noexit 1',  # Prevents players from exiting the level in deathmatch before timelimit is hit.
                f'+viz_respawn_delay {self.respawn_delay}',  # Sets delay between respanws (in seconds).
                '+viz_connect_timeout 4',  # In seconds
            ]
            self.game.add_game_args(' '.join(game_args_list))

            # Additional commands:
            #
            # disables depth and labels buffer and the ability to use commands
            # that could interfere with multiplayer game (should use this in evaluation)
            # '+viz_nocheat 1'

            # Name your agent and select color
            # colors:
            # 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue
            self.game.add_game_args(
                f'+name AI{self.player_id}_host +colorset 0')

            if self.record_to is not None:
                # reportedly this does not work with bots
                demo_path = self.demo_path(self._num_episodes)
                log.debug('Recording multiplayer demo to %s', demo_path)
                self.game.add_game_args(f'-record {demo_path}')
        else:
            # Join existing game.
            self.game.add_game_args(
                f'-join 127.0.0.1:{port} '  # Connect to a host for a multiplayer game.
                '+viz_connect_timeout 4 ')

            # Name your agent and select color
            # colors:
            # 0 - green, 1 - gray, 2 - brown, 3 - red, 4 - light gray, 5 - light brown, 6 - light red, 7 - light blue
            self.game.add_game_args(f'+name AI{self.player_id} +colorset 0')

        self.game.set_episode_timeout(
            int(self.timelimit * 60 * self.game.get_ticrate()))

        self._game_init(with_locking=False
                        )  # locking is handled by the multi-agent wrapper
        log.info('Initialized w:%d v:%d player:%d', self.worker_index,
                 self.vector_index, self.player_id)
        self.initialized = True
Exemple #22
0
    def start(self):
        real_envs = []
        imagined_envs = None

        timing = AttrDict({'copying': 0, 'prediction': 0})

        while True:
            actions, msg_type = safe_get(self.task_queue)

            if msg_type == MsgType.INIT:
                self._init(real_envs)
                self.task_queue.task_done()
                continue

            if msg_type == MsgType.TERMINATE:
                self._terminate(real_envs, imagined_envs)
                self.task_queue.task_done()
                break

            # handling actual workload
            envs = real_envs
            if msg_type == MsgType.RESET or msg_type == MsgType.STEP_REAL or msg_type == MsgType.STEP_REAL_RESET:
                if imagined_envs is not None:
                    for imagined_env in imagined_envs:
                        imagined_env.close()
                imagined_envs = None
            elif msg_type == MsgType.INFO:
                pass
            else:

                if imagined_envs is None:
                    # initializing new prediction, let's report timing for the previous one
                    if timing.prediction > 0 and self._verbose:
                        log.debug(
                            'Multi-env copy took %.6f s, prediction took %.6f s',
                            timing.copying,
                            timing.prediction,
                        )

                    timing.prediction = 0
                    timing.copying = time.time()

                    imagined_envs = []
                    # we expect a list of actions for every environment in this worker (list of lists)
                    assert len(actions) == len(real_envs)
                    for env_idx in range(len(actions)):
                        for _ in actions[env_idx]:
                            imagined_env = copy.deepcopy(real_envs[env_idx])
                            imagined_envs.append(imagined_env)
                    timing.copying = time.time() - timing.copying

                envs = imagined_envs
                actions = np.asarray(actions).flatten()

            if msg_type == MsgType.RESET:
                results = [env.reset() for env in envs]
            elif msg_type == MsgType.INFO:
                results = [self._get_info(env) for env in envs]
            else:
                assert len(envs) == len(actions)

                reset = [False] * len(actions)
                if msg_type == MsgType.STEP_REAL_RESET:
                    actions, reset = zip(*actions)

                # Collect obs, reward, done, and info
                prediction_start = time.time()
                results = [
                    env.step(action) for env, action in zip(envs, actions)
                ]
                self.timestep += 1

                # pack results per-env
                results = np.split(np.array(results), len(real_envs))

                if msg_type == MsgType.STEP_IMAGINED:
                    timing.prediction += time.time() - prediction_start

                # If this is a real step and the env is done, reset
                if msg_type == MsgType.STEP_REAL or msg_type == MsgType.STEP_REAL_RESET:
                    for i, result in enumerate(results):
                        obs, reward, done, info = result[0]

                        if self.is_multiagent and all(done):
                            is_done = True
                        elif not self.is_multiagent and done:
                            is_done = True
                        else:
                            is_done = False

                        if is_done or reset[i]:
                            obs = real_envs[i].reset()
                            if not self.is_multiagent:
                                info = self._get_info(
                                    real_envs[i])  # info for the new episode

                        results[i] = (obs, reward, done, info)

            self.result_queue.put(results)
            self.task_queue.task_done()
Exemple #23
0
 def write(self, key, pk3_path):
     """Environment object itself acts as a proxy to the global level cache."""
     log.debug('Add new level to cache! Level %s seed %r key %s',
               self.level_name, self.last_reset_seed, key)
     self.curr_cache.add_new_level(self.level, self.last_reset_seed, key,
                                   pk3_path)
Exemple #24
0
    def __init__(self, cache_dir, experiment_dir, all_levels_for_experiment,
                 policy_idx):
        self.cache_dir = cache_dir
        self.experiment_dir = experiment_dir
        self.policy_idx = policy_idx

        self.all_seeds = dict()
        self.available_seeds = dict()
        self.used_seeds = dict()
        self.num_seeds_used_in_current_run = dict()
        self.locks = dict()

        for lvl in all_levels_for_experiment:
            self.all_seeds[lvl] = []
            self.available_seeds[lvl] = []
            self.num_seeds_used_in_current_run[lvl] = multiprocessing.RawValue(
                ctypes.c_int32, 0)
            self.locks[lvl] = multiprocessing.Lock()

        log.debug('Reading the DMLab level cache...')
        cache_dir = ensure_dir_exists(cache_dir)

        lvl_seed_files = Path(cache_dir).rglob(f'*.{LEVEL_SEEDS_FILE_EXT}')
        for lvl_seed_file in lvl_seed_files:
            lvl_seed_file = str(lvl_seed_file)
            level = filename_to_level(os.path.relpath(lvl_seed_file,
                                                      cache_dir))
            self.all_seeds[level] = read_seeds_file(lvl_seed_file,
                                                    has_keys=True)
            self.all_seeds[level] = list(set(
                self.all_seeds[level]))  # leave only unique seeds
            log.debug('Level %s has %d total seeds available', level,
                      len(self.all_seeds[level]))

        log.debug('Updating level cache for the current experiment...')
        used_lvl_seeds_dir = self.get_used_seeds_dir()
        used_seeds_files = Path(used_lvl_seeds_dir).rglob(
            f'*.{LEVEL_SEEDS_FILE_EXT}')
        self.used_seeds = dict()
        for used_seeds_file in used_seeds_files:
            used_seeds_file = str(used_seeds_file)
            level = filename_to_level(
                os.path.relpath(used_seeds_file, used_lvl_seeds_dir))
            self.used_seeds[level] = read_seeds_file(used_seeds_file,
                                                     has_keys=False)
            log.debug('%d seeds already used in this experiment for level %s',
                      len(self.used_seeds[level]), level)

            self.used_seeds[level] = set(self.used_seeds[level])

        for lvl in all_levels_for_experiment:
            lvl_seeds = self.all_seeds.get(lvl, [])
            lvl_used_seeds = self.used_seeds.get(lvl, [])

            lvl_remaining_seeds = set(lvl_seeds) - set(lvl_used_seeds)
            self.available_seeds[lvl] = list(lvl_remaining_seeds)

            same_levels_for_population = False
            if same_levels_for_population:
                # shuffle with fixed seed so agents in population get the same levels
                random.Random(42).shuffle(self.available_seeds[lvl])
            else:
                random.shuffle(self.available_seeds[lvl])

            log.debug('Env %s has %d remaining unused seeds', lvl,
                      len(self.available_seeds[lvl]))

        log.debug('Done initializing global DMLab level cache!')
Exemple #25
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        psutil.Process().nice(10)

        num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE)
        assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...'
        assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker'

        with timing.timeit('env_init'):
            env_key = 'env'
            env_desired_num_levels = 0

            global_env_id = proc_idx * self.cfg.num_envs_per_worker
            env_config = AttrDict(worker_index=proc_idx,
                                  vector_index=0,
                                  env_id=global_env_id)
            env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config)
            env.seed(global_env_id)

            # this is to track the performance for individual DMLab levels
            if hasattr(env.unwrapped, 'level_name'):
                env_key = env.unwrapped.level_name
                env_level = env.unwrapped.level

                approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[
                    env_key]
                num_billions = DESIRED_TRAINING_LENGTH / int(1e9)
                num_workers_for_env = self.cfg.num_workers // num_envs
                env_desired_num_levels = int(
                    (approx_num_episodes_per_1b_frames * num_billions) /
                    num_workers_for_env)

                env_num_levels_generated = len(
                    dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0].
                    all_seeds[env_level]) // num_workers_for_env

                log.warning('Worker %d (env %s) generated %d/%d levels!',
                            proc_idx, env_key, env_num_levels_generated,
                            env_desired_num_levels)
                time.sleep(4)

            env.reset()
            env_uses_level_cache = env.unwrapped.env_uses_level_cache

            self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True))

        self.start_event.wait()

        try:
            with timing.timeit('work'):
                last_report = last_report_frames = total_env_frames = 0
                while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                    action = env.action_space.sample()
                    with timing.add_time(f'{env_key}.step'):
                        env.step(action)

                    total_env_frames += 1

                    with timing.add_time(f'{env_key}.reset'):
                        env.reset()
                        env_num_levels_generated += 1
                        log.debug('Env %s done %d/%d resets', env_key,
                                  env_num_levels_generated,
                                  env_desired_num_levels)

                    if env_num_levels_generated >= env_desired_num_levels:
                        log.debug('%s finished %d/%d resets, sleeping...',
                                  env_key, env_num_levels_generated,
                                  env_desired_num_levels)
                        time.sleep(30)  # free up CPU time for other envs

                    # if env does not use level cache, there is no need to run it
                    # let other workers proceed
                    if not env_uses_level_cache:
                        log.debug('Env %s does not require cache, sleeping...',
                                  env_key)
                        time.sleep(200)

                    with timing.add_time('report'):
                        now = time.time()
                        if now - last_report > self.report_every_sec:
                            last_report = now
                            frames_since_last_report = total_env_frames - last_report_frames
                            last_report_frames = total_env_frames
                            self.report_queue.put(
                                dict(proc_idx=proc_idx,
                                     env_frames=frames_since_last_report))

                            if get_free_disk_space_mb() < 3 * 1024:
                                log.error('Not enough disk space! %d',
                                          get_free_disk_space_mb())
                                time.sleep(200)
        except:
            log.exception('Unknown exception')
            log.error('Unknown exception in worker %d, terminating...',
                      proc_idx)
            self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

        time.sleep(proc_idx * 0.1 + 0.1)
        log.info('Process %d finished sampling. Timing: %s', proc_idx, timing)

        env.close()
Exemple #26
0
    def play_human_mode(env, skip_frames=1, num_episodes=3, num_actions=None):
        from pynput.keyboard import Listener

        doom = env.unwrapped
        doom.skip_frames = 1  # handled by this script separately

        # noinspection PyProtectedMember
        def start_listener():
            with Listener(on_press=doom._keyboard_on_press,
                          on_release=doom._keyboard_on_release) as listener:
                listener.join()

        listener_thread = Thread(target=start_listener)
        listener_thread.start()

        for episode in range(num_episodes):
            doom.mode = 'human'
            env.reset()
            last_render_time = time.time()
            time_between_frames = 1.0 / 35.0

            total_rew = 0.0

            while not doom.game.is_episode_finished() and not doom._terminate:
                num_actions = 14 if num_actions is None else num_actions
                turn_delta_action_idx = num_actions - 1

                actions = [0] * num_actions
                for action in doom._current_actions:
                    if isinstance(action, int):
                        actions[
                            action] = 1  # 1 for buttons currently pressed, 0 otherwise
                    else:
                        if action == 'turn_left':
                            actions[
                                turn_delta_action_idx] = -doom.delta_actions_scaling_factor
                        elif action == 'turn_right':
                            actions[
                                turn_delta_action_idx] = doom.delta_actions_scaling_factor

                for frame in range(skip_frames):
                    doom._actions_flattened = actions
                    _, rew, _, _ = env.step(actions)

                    new_total_rew = total_rew + rew
                    if new_total_rew != total_rew:
                        log.info('Reward: %.3f, total: %.3f', rew,
                                 new_total_rew)
                    total_rew = new_total_rew
                    state = doom.game.get_state()

                    verbose = True
                    if state is not None and verbose:
                        info = doom.get_info()
                        print(
                            'Health:',
                            info['HEALTH'],
                            # 'Weapon:', info['SELECTED_WEAPON'],
                            # 'ready:', info['ATTACK_READY'],
                            # 'ammo:', info['SELECTED_WEAPON_AMMO'],
                            # 'pc:', info['PLAYER_COUNT'],
                            # 'dmg:', info['DAMAGECOUNT'],
                        )

                    time_since_last_render = time.time() - last_render_time
                    time_wait = time_between_frames - time_since_last_render

                    if doom.show_automap and state.automap_buffer is not None:
                        map_ = state.automap_buffer
                        map_ = np.swapaxes(map_, 0, 2)
                        map_ = np.swapaxes(map_, 0, 1)
                        cv2.imshow('ViZDoom Automap Buffer', map_)
                        if time_wait > 0:
                            cv2.waitKey(int(time_wait) * 1000)
                    else:
                        if time_wait > 0:
                            time.sleep(time_wait)

                    last_render_time = time.time()

            if doom.show_automap:
                cv2.destroyAllWindows()

        log.debug('Press ESC to exit...')
        listener_thread.join()