Esempio n. 1
0
    def forward_pass(device_type):
        env_name = 'atari_breakout'
        cfg = default_cfg(algo='appooc', env=env_name)
        cfg.actor_critic_share_weights = True
        cfg.hidden_size = 128
        cfg.use_rnn = True
        cfg.env_framestack = 4

        env = create_env(env_name, cfg=cfg)

        torch.set_num_threads(1)
        torch.backends.cudnn.benchmark = True

        actor_critic = create_actor_critic(cfg, env.observation_space, env.action_space)
        device = torch.device(device_type)
        actor_critic.to(device)

        timing = Timing()
        with timing.timeit('all'):
            batch = 128
            with timing.add_time('input'):
                # better avoid hardcoding here...
                observations = dict(obs=torch.rand([batch, 4, 84, 84]).to(device))
                rnn_states = torch.rand([batch, get_hidden_size(cfg)]).to(device)

            n = 200
            for i in range(n):
                with timing.add_time('forward'):
                    output = actor_critic(observations, rnn_states)

                log.debug('Progress %d/%d', i, n)

        log.debug('Timing: %s', timing)
Esempio n. 2
0
    def test_quad_env(self):
        self.assertIsNotNone(create_env('quadrotor_single'))

        env = create_env('quadrotor_single')
        obs = env.reset()

        n_frames = 10000

        timing = Timing()
        with timing.timeit('step'):
            for i in range(n_frames):
                obs, r, d, info = env.step(env.action_space.sample())
                if d:
                    env.reset()

        log.debug('Time %s, FPS %.1f', timing, n_frames / timing.step)
def make_env_func(cfg, env_config):
    env = create_env(cfg.env, cfg=cfg, env_config=env_config)
    if not is_multiagent_env(env):
        env = MultiAgentWrapper(env)
    if not isinstance(env.observation_space, spaces.Dict):
        env = DictObservationsWrapper(env)
    return env
Esempio n. 4
0
    def __init__(
        self,
        game=None,
        frame_skip=4,  # Frames per step (>=1).
        num_img_obs=4,  # Number of (past) frames in observation (>=1).
        clip_reward=True,
        episodic_lives=True,
        max_start_noops=30,
        repeat_action_probability=0.,
        horizon=27000,
    ):

        if not game:
            game = 'doom_battle'

        cfg = default_cfg(env=game)
        cfg.wide_aspect_ratio = False

        self.env = create_env(game, cfg=cfg)
        self._observation_space = self.env.observation_space

        gym_action_space = self.env.action_space
        self._action_space = IntBox(
            low=0, high=gym_action_space.n)  # only for discrete space

        self.first_reset = True
Esempio n. 5
0
    def __init__(self,
                 level,
                 config,
                 num_action_repeats,
                 seed,
                 runfiles_path=None,
                 level_cache=None):
        self._observation_spec = ['RGB_INTERLEAVED']
        env_name = 'doom_benchmark'
        cfg = default_cfg(env=env_name, algo=None)
        cfg.pixel_format = 'HWC'
        cfg.res_w = DOOM_W
        cfg.res_h = DOOM_H
        cfg.wide_aspect_ratio = False
        self._env = create_env(env_name, cfg=cfg)

        lock = FileLock(DOOM_LOCK_PATH)
        attempt = 0
        while True:
            attempt += 1
            try:
                with lock.acquire(timeout=10):
                    print('Env created, resetting...')
                    self._env.reset()
                    print('Env reset completed!')
                    break
            except Timeout:
                print(
                    'Another instance of this application currently holds the lock, attempt:',
                    attempt)
Esempio n. 6
0
def make_env_func(cfg, env_config):
    env = create_env(cfg.env, cfg=cfg, env_config=env_config)
    if not hasattr(env, 'num_agents') or env.num_agents <= 1:
        env = MultiAgentWrapper(env)
    if not isinstance(env.observation_space, spaces.Dict):
        env = DictObservationsWrapper(env)
    return env
Esempio n. 7
0
    def test_quad_multi_env(self):
        env_name = 'quadrotor_multi'
        cfg = default_cfg(env=env_name)
        self.assertIsNotNone(create_env(env_name, cfg=cfg))

        env = create_env(env_name, cfg=cfg)
        env.reset()

        n_frames = 1000

        timing = Timing()
        with timing.timeit('step'):
            for i in range(n_frames):
                obs, r, d, info = env.step(
                    [env.action_space.sample() for _ in range(env.num_agents)])

        log.debug('Time %s, FPS %.1f', timing, n_frames / timing.step)
Esempio n. 8
0
    def initialize(self):
        # creating an environment in the main process tends to fix some very weird issues further down the line
        # https://stackoverflow.com/questions/60963839/importing-opencv-after-importing-pytorch-messes-with-cpu-affinity
        # do not delete this unless you know what you're doing
        tmp_env = create_env(self.cfg.env, cfg=self.cfg, env_config=None)
        tmp_env.close()

        for i in range(self.cfg.num_workers):
            p = multiprocessing.Process(target=self.sample, args=(i, ))
            self.processes.append(p)
Esempio n. 9
0
def main():
    env_name = 'doom_battle'
    env = create_env(env_name, cfg=default_cfg(env=env_name))

    env.reset()
    done = False
    while not done:
        env.render()
        obs, rew, done, info = env.step(env.action_space.sample())

    log.info('Done!')
Esempio n. 10
0
def run_multi_quadrotor_env(env_name, cfg):
    env = create_env(env_name, cfg=cfg)
    env.reset()
    for i in range(100):
        obs, r, d, info = env.step(
            [env.action_space.sample() for _ in range(env.num_agents)])

    n_frames = 1000
    env = create_env(env_name, cfg=cfg)
    env.reset()

    timing = Timing()
    with timing.timeit('step'):
        for i in range(n_frames):
            obs, r, d, info = env.step(
                [env.action_space.sample() for _ in range(env.num_agents)])

    log.debug('Time %s, FPS %.1f', timing,
              n_frames * env.num_agents / timing.step)
    env.close()
Esempio n. 11
0
    def test_minigrid_env(self):
        env_name = 'MiniGrid-Empty-Random-5x5-v0'
        env = create_env(env_name, cfg=default_cfg(env=env_name))
        log.info('Env action space: %r', env.action_space)
        log.info('Env obs space: %r', env.observation_space)

        env.reset()
        total_rew = 0
        for i in range(1000):
            obs, rew, done, info = env.step(env.action_space.sample())
            total_rew += rew
            if done:
                env.reset()
    def test_voxel_env(self):
        env_name = 'voxel_env_Sokoban'
        env = create_env(env_name, cfg=default_cfg(env=env_name))
        log.info('Env action space: %r', env.action_space)
        log.info('Env obs space: %r', env.observation_space)

        env.reset()
        total_rew = 0
        for i in range(1000):
            obs, rew, done, info = env.step([env.action_space.sample() for _ in range(env.num_agents)])
            total_rew += sum(rew)

        log.info('Total rew: %.3f', total_rew)
Esempio n. 13
0
 def __init__(self,
              level,
              config,
              num_action_repeats,
              seed,
              runfiles_path=None,
              level_cache=None):
     self._observation_spec = ['RGB_INTERLEAVED']
     env_name = 'atari_breakout'
     cfg = default_cfg(env=env_name, algo=None)
     cfg.pixel_format = 'HWC'
     cfg.res_w = ATARI_W
     cfg.res_h = ATARI_H
     self._env = create_env(env_name, cfg=cfg)
    def __init__(
        self,
        game=None,
        frame_skip=4,  # Frames per step (>=1).
        num_img_obs=4,  # Number of (past) frames in observation (>=1).
        clip_reward=True,
        episodic_lives=True,
        max_start_noops=30,
        repeat_action_probability=0.,
        horizon=27000,
    ):

        cfg = default_cfg(env=game)
        cfg.res_w = 96
        cfg.res_h = 72
        cfg.dmlab_throughput_benchmark = True
        cfg.dmlab_renderer = 'software'

        self.env = create_env(game, cfg=cfg)
        self._observation_space = self.env.observation_space

        gym_action_space = self.env.action_space
        self._action_space = IntBox(
            low=0, high=gym_action_space.n)  # only for discrete space
Esempio n. 15
0
 def make_env_func(env_config):
     return create_env(cfg.env, cfg=cfg, env_config=env_config)
Esempio n. 16
0
 def test_quad_multi_env_with_numba(self):
     env_name = 'quadrotor_multi'
     cfg = default_cfg(env=env_name)
     cfg.quads_use_numba = True
     self.assertIsNotNone(create_env(env_name, cfg=cfg))
     run_multi_quadrotor_env(env_name, cfg)
Esempio n. 17
0
 def test_quad_multi_env(self):
     env_name = 'quadrotor_multi'
     cfg = default_cfg(env=env_name)
     self.assertIsNotNone(create_env(env_name, cfg=cfg))
     run_multi_quadrotor_env(env_name, cfg)
 def make_env_func(env_config):
     return create_env(ENV_NAME, cfg=common_config, env_config=env_config)
Esempio n. 19
0
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        from threadpoolctl import threadpool_limits
        with threadpool_limits(limits=1, user_api=None):
            if self.cfg.set_workers_cpu_affinity:
                set_process_cpu_affinity(proc_idx, self.cfg.num_workers)

            initial_cpu_affinity = psutil.Process().cpu_affinity(
            ) if platform != 'darwin' else None
            psutil.Process().nice(10)

            with timing.timeit('env_init'):
                envs = []
                env_key = ['env' for _ in range(self.cfg.num_envs_per_worker)]

                for env_idx in range(self.cfg.num_envs_per_worker):
                    global_env_id = proc_idx * self.cfg.num_envs_per_worker + env_idx
                    env_config = AttrDict(worker_index=proc_idx,
                                          vector_index=env_idx,
                                          env_id=global_env_id)
                    env = create_env(self.cfg.env,
                                     cfg=self.cfg,
                                     env_config=env_config)
                    log.debug(
                        'CPU affinity after create_env: %r',
                        psutil.Process().cpu_affinity()
                        if platform != 'darwin' else 'MacOS - None')
                    env.seed(global_env_id)
                    envs.append(env)

                    # this is to track the performance for individual DMLab levels
                    if hasattr(env.unwrapped, 'level_name'):
                        env_key[env_idx] = env.unwrapped.level_name

                episode_length = [0 for _ in envs]
                episode_lengths = [deque([], maxlen=20) for _ in envs]

            try:
                with timing.timeit('first_reset'):
                    for env_idx, env in enumerate(envs):
                        env.reset()
                        log.info('Process %d finished resetting %d/%d envs',
                                 proc_idx, env_idx + 1, len(envs))

                    self.report_queue.put(
                        dict(proc_idx=proc_idx, finished_reset=True))

                self.start_event.wait()

                with timing.timeit('work'):
                    last_report = last_report_frames = total_env_frames = 0
                    while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                        for env_idx, env in enumerate(envs):
                            action = env.action_space.sample()
                            with timing.add_time(f'{env_key[env_idx]}.step'):
                                obs, reward, done, info = env.step(action)

                            num_frames = info.get('num_frames', 1)
                            total_env_frames += num_frames
                            episode_length[env_idx] += num_frames

                            if done:
                                with timing.add_time(
                                        f'{env_key[env_idx]}.reset'):
                                    env.reset()

                                episode_lengths[env_idx].append(
                                    episode_length[env_idx])
                                episode_length[env_idx] = 0

                        with timing.add_time('report'):
                            now = time.time()
                            if now - last_report > self.report_every_sec:
                                last_report = now
                                frames_since_last_report = total_env_frames - last_report_frames
                                last_report_frames = total_env_frames
                                self.report_queue.put(
                                    dict(proc_idx=proc_idx,
                                         env_frames=frames_since_last_report))

                # Extra check to make sure cpu affinity is preserved throughout the execution.
                # I observed weird effect when some environments tried to alter affinity of the current process, leading
                # to decreased performance.
                # This can be caused by some interactions between deep learning libs, OpenCV, MKL, OpenMP, etc.
                # At least user should know about it if this is happening.
                cpu_affinity = psutil.Process().cpu_affinity(
                ) if platform != 'darwin' else None
                assert initial_cpu_affinity == cpu_affinity, \
                    f'Worker CPU affinity was changed from {initial_cpu_affinity} to {cpu_affinity}!' \
                    f'This can significantly affect performance!'

            except:
                log.exception('Unknown exception')
                log.error('Unknown exception in worker %d, terminating...',
                          proc_idx)
                self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

            time.sleep(proc_idx * 0.01 + 0.01)
            log.info('Process %d finished sampling. Timing: %s', proc_idx,
                     timing)

            for env_idx, env in enumerate(envs):
                if len(episode_lengths[env_idx]) > 0:
                    log.warning('Level %s avg episode len %d',
                                env_key[env_idx],
                                np.mean(episode_lengths[env_idx]))

            for env in envs:
                env.close()
    def sample(self, proc_idx):
        # workers should ignore Ctrl+C because the termination is handled in the event loop by a special msg
        signal.signal(signal.SIGINT, signal.SIG_IGN)

        timing = Timing()

        psutil.Process().nice(10)

        num_envs = len(DMLAB30_LEVELS_THAT_USE_LEVEL_CACHE)
        assert self.cfg.num_workers % num_envs == 0, f'should have an integer number of workers per env, e.g. {1 * num_envs}, {2 * num_envs}, etc...'
        assert self.cfg.num_envs_per_worker == 1, 'use populate_cache with 1 env per worker'

        with timing.timeit('env_init'):
            env_key = 'env'
            env_desired_num_levels = 0

            global_env_id = proc_idx * self.cfg.num_envs_per_worker
            env_config = AttrDict(worker_index=proc_idx, vector_index=0, env_id=global_env_id)
            env = create_env(self.cfg.env, cfg=self.cfg, env_config=env_config)
            env.seed(global_env_id)

            # this is to track the performance for individual DMLab levels
            if hasattr(env.unwrapped, 'level_name'):
                env_key = env.unwrapped.level_name
                env_level = env.unwrapped.level

                approx_num_episodes_per_1b_frames = DMLAB30_APPROX_NUM_EPISODES_PER_BILLION_FRAMES[
                    env_key]
                num_billions = DESIRED_TRAINING_LENGTH / int(1e9)
                num_workers_for_env = self.cfg.num_workers // num_envs
                env_desired_num_levels = int(
                    (approx_num_episodes_per_1b_frames * num_billions) / num_workers_for_env)

                env_num_levels_generated = len(dmlab_level_cache.DMLAB_GLOBAL_LEVEL_CACHE[0].
                                               all_seeds[env_level]) // num_workers_for_env

                log.warning('Worker %d (env %s) generated %d/%d levels!',
                            proc_idx,
                            env_key,
                            env_num_levels_generated,
                            env_desired_num_levels)
                time.sleep(4)

            env.reset()
            env_uses_level_cache = env.unwrapped.env_uses_level_cache

            self.report_queue.put(dict(proc_idx=proc_idx, finished_reset=True))

        self.start_event.wait()

        try:
            with timing.timeit('work'):
                last_report = last_report_frames = total_env_frames = 0
                while not self.terminate.value and total_env_frames < self.cfg.sample_env_frames_per_worker:
                    action = env.action_space.sample()
                    with timing.add_time(f'{env_key}.step'):
                        env.step(action)

                    total_env_frames += 1

                    with timing.add_time(f'{env_key}.reset'):
                        env.reset()
                        env_num_levels_generated += 1
                        log.debug('Env %s done %d/%d resets',
                                  env_key,
                                  env_num_levels_generated,
                                  env_desired_num_levels)

                    if env_num_levels_generated >= env_desired_num_levels:
                        log.debug('%s finished %d/%d resets, sleeping...',
                                  env_key,
                                  env_num_levels_generated,
                                  env_desired_num_levels)
                        time.sleep(30)  # free up CPU time for other envs

                    # if env does not use level cache, there is no need to run it
                    # let other workers proceed
                    if not env_uses_level_cache:
                        log.debug('Env %s does not require cache, sleeping...', env_key)
                        time.sleep(200)

                    with timing.add_time('report'):
                        now = time.time()
                        if now - last_report > self.report_every_sec:
                            last_report = now
                            frames_since_last_report = total_env_frames - last_report_frames
                            last_report_frames = total_env_frames
                            self.report_queue.put(
                                dict(proc_idx=proc_idx, env_frames=frames_since_last_report))

                            if get_free_disk_space_mb(self.cfg) < 3 * 1024:
                                log.error('Not enough disk space! %d',
                                          get_free_disk_space_mb(self.cfg))
                                time.sleep(200)
        except:
            log.exception('Unknown exception')
            log.error('Unknown exception in worker %d, terminating...', proc_idx)
            self.report_queue.put(dict(proc_idx=proc_idx, crash=True))

        time.sleep(proc_idx * 0.1 + 0.1)
        log.info('Process %d finished sampling. Timing: %s', proc_idx, timing)

        env.close()