Example #1
0
def set_gpus_for_process(process_idx, num_gpus_per_process, process_type, gpu_mask=None):
    available_gpus = get_available_gpus()
    if gpu_mask is not None:
        assert len(available_gpus) >= len(available_gpus)
        available_gpus = [available_gpus[g] for g in gpu_mask]
    num_gpus = len(available_gpus)
    gpus_to_use = []

    if num_gpus == 0:
        os.environ[CUDA_ENVVAR] = ''
        log.debug('Not using GPUs for %s process %d', process_type, process_idx)
    else:
        first_gpu_idx = process_idx * num_gpus_per_process
        for i in range(num_gpus_per_process):
            index_mod_num_gpus = (first_gpu_idx + i) % num_gpus
            gpus_to_use.append(available_gpus[index_mod_num_gpus])

        os.environ[CUDA_ENVVAR] = ','.join([str(g) for g in gpus_to_use])
        log.info(
            'Set environment var %s to %r for %s process %d',
            CUDA_ENVVAR, os.environ[CUDA_ENVVAR], process_type, process_idx,
        )
        log.debug('Visible devices: %r', torch.cuda.device_count())

    return gpus_to_use
Example #2
0
    def _game_init(self, with_locking=True, max_parallel=10):
        lock_file = lock = None
        if with_locking:
            lock_file = doom_lock_file(max_parallel)
            lock = FileLock(lock_file)

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                if with_locking:
                    with lock.acquire(timeout=20):
                        self.game.init()
                else:
                    self.game.init()

                break
            except Timeout:
                if with_locking:
                    log.debug(
                        'Another process currently holds the lock %s, attempt: %d',
                        lock_file,
                        init_attempt,
                    )
            except Exception as exc:
                log.warning(
                    'VizDoom game.init() threw an exception %r. Terminate process...',
                    exc)
                from sample_factory.envs.env_utils import EnvCriticalError
                raise EnvCriticalError()
Example #3
0
def find_available_port(start_port, increment=1000):
    port = start_port
    while port < 65535 and not is_udp_port_available(port):
        port += increment

    log.debug('Port %r is available', port)
    return port
Example #4
0
    def print_stats(self, fps, sample_throughput, total_env_steps):
        fps_str = []
        for interval, fps_value in zip(self.avg_stats_intervals, fps):
            fps_str.append(
                f'{int(interval * self.report_interval)} sec: {fps_value:.1f}')
        fps_str = f'({", ".join(fps_str)})'

        samples_per_policy = ', '.join(
            [f'{p}: {s:.1f}' for p, s in sample_throughput.items()])

        lag_stats = self.policy_lag[0]
        lag = AttrDict()
        for key in ['min', 'avg', 'max']:
            lag[key] = lag_stats.get(f'version_diff_{key}', -1)
        policy_lag_str = f'min: {lag.min:.1f}, avg: {lag.avg:.1f}, max: {lag.max:.1f}'

        log.debug(
            'Fps is %s. Total num frames: %d. Throughput: %s. Samples: %d. Policy #0 lag: (%s)',
            fps_str,
            total_env_steps,
            samples_per_policy,
            sum(self.samples_collected),
            policy_lag_str,
        )

        if 'reward' in self.policy_avg_stats:
            policy_reward_stats = []
            for policy_id in range(self.cfg.num_policies):
                reward_stats = self.policy_avg_stats['reward'][policy_id]
                if len(reward_stats) > 0:
                    policy_reward_stats.append(
                        (policy_id, f'{np.mean(reward_stats):.3f}'))
            log.debug('Avg episode reward: %r', policy_reward_stats)
Example #5
0
    def _learner_load_model(self, policy_id, replacement_policy):
        log.debug('Asking learner %d to load model from %d', policy_id,
                  replacement_policy)

        load_task = (PbtTask.LOAD_MODEL, (policy_id, replacement_policy))
        learner_worker = self.learner_workers[policy_id]
        learner_worker.task_queue.put((TaskType.PBT, load_task))
Example #6
0
def load_from_checkpoint(cfg):
    filename = cfg_file(cfg)
    if not os.path.isfile(filename):
        raise Exception(
            f'Could not load saved parameters for experiment {cfg.experiment}')

    with open(filename, 'r') as json_file:
        json_params = json.load(json_file)
        log.warning('Loading existing experiment configuration from %s',
                    filename)
        loaded_cfg = AttrDict(json_params)

    # override the parameters in config file with values passed from command line
    for key, value in cfg.cli_args.items():
        if key in loaded_cfg and loaded_cfg[key] != value:
            log.debug(
                'Overriding arg %r with value %r passed from command line',
                key, value)
            loaded_cfg[key] = value

    # incorporate extra CLI parameters that were not present in JSON file
    for key, value in vars(cfg).items():
        if key not in loaded_cfg:
            log.debug(
                'Adding new argument %r=%r that is not in the saved config file!',
                key, value)
            loaded_cfg[key] = value

    return loaded_cfg
Example #7
0
    def _learner_update_cfg(self, policy_id):
        learner_worker = self.learner_workers[policy_id]

        log.debug('Sending learning configuration to learner %d...', policy_id)
        cfg_task = (PbtTask.UPDATE_CFG, (policy_id,
                                         self.policy_cfg[policy_id]))
        learner_worker.task_queue.put((TaskType.PBT, cfg_task))
Example #8
0
    def __init__(self, cfg, obs_space, timing):
        super().__init__(cfg, timing)

        obs_shape = get_obs_shape(obs_space)
        input_ch = obs_shape.obs[0]
        log.debug('Num input channels: %d', input_ch)

        if cfg.encoder_subtype == 'convnet_simple':
            conv_filters = [[input_ch, 32, 8, 4], [32, 64, 4, 2],
                            [64, 128, 3, 2]]
        elif cfg.encoder_subtype == 'convnet_impala':
            conv_filters = [[input_ch, 16, 8, 4], [16, 32, 4, 2]]
        elif cfg.encoder_subtype == 'minigrid_convnet_tiny':
            conv_filters = [[3, 16, 3, 1], [16, 32, 2, 1], [32, 64, 2, 1]]
        else:
            raise NotImplementedError(f'Unknown encoder {cfg.encoder_subtype}')

        activation = nonlinearity(self.cfg)
        fc_layer_size = fc_after_encoder_size(self.cfg)
        encoder_extra_fc_layers = self.cfg.encoder_extra_fc_layers

        enc = self.ConvEncoderImpl(activation, conv_filters, fc_layer_size,
                                   encoder_extra_fc_layers, obs_shape)
        self.enc = torch.jit.script(enc)

        self.encoder_out_size = calc_num_elements(self.enc, obs_shape.obs)
        log.debug('Encoder output size: %r', self.encoder_out_size)
Example #9
0
 def _save_reward_shaping(self, policy_id):
     policy_reward_shaping_filename = policy_reward_shaping_file(
         self.cfg, policy_id)
     with open(policy_reward_shaping_filename, 'w') as json_file:
         log.debug('Saving policy-specific reward shaping %d to file %s',
                   policy_id, policy_reward_shaping_filename)
         json.dump(self.policy_reward_shaping[policy_id], json_file)
def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
    scenario_name = env_name.split('voxel_env_')[-1].casefold()
    log.debug('Using scenario %s', scenario_name)

    if 'multitask' in scenario_name:
        if env_config is not None and 'worker_index' in env_config:
            task_idx = env_config['worker_index']
        else:
            log.warning('Could not find information about task id. Use task_id=0. (It is okay if this message appears once)')
            task_idx = 0

        env = make_env_multitask(
            scenario_name,
            task_idx,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )
    else:
        env = VoxelEnv(
            scenario_name=scenario_name,
            num_envs=cfg.voxel_num_envs_per_instance,
            num_agents_per_env=cfg.voxel_num_agents_per_env,
            num_simulation_threads=cfg.voxel_num_simulation_threads,
            use_vulkan=cfg.voxel_use_vulkan,
        )

    env = Wrapper(env, cfg.voxel_increase_team_spirit, cfg.voxel_max_team_spirit_steps)
    return env
Example #11
0
    def __init__(self, cfg, obs_space, timing):
        super().__init__(cfg, timing)

        self.basic_encoder = create_standard_encoder(cfg, obs_space, timing)
        self.encoder_out_size = self.basic_encoder.encoder_out_size

        # same as IMPALA paper
        self.embedding_size = 20
        self.instructions_lstm_units = 64
        self.instructions_lstm_layers = 1

        padding_idx = 0
        self.word_embedding = nn.Embedding(
            num_embeddings=DMLAB_VOCABULARY_SIZE,
            embedding_dim=self.embedding_size,
            padding_idx=padding_idx
        )

        self.instructions_lstm = nn.LSTM(
            input_size=self.embedding_size,
            hidden_size=self.instructions_lstm_units,
            num_layers=self.instructions_lstm_layers,
            batch_first=True,
        )

        # learnable initial state?
        # initial_hidden_values = torch.normal(0, 1, size=(self.instructions_lstm_units, ))
        # self.lstm_h0 = nn.Parameter(initial_hidden_values, requires_grad=True)
        # self.lstm_c0 = nn.Parameter(initial_hidden_values, requires_grad=True)

        self.encoder_out_size += self.instructions_lstm_units
        log.debug('Policy head output size: %r', self.encoder_out_size)

        self.cpu_device = torch.device('cpu')
Example #12
0
    def step(self, actions):
        if self.skip_frames > 1 or self.num_agents == 1:
            # not used in multi-agent mode due to VizDoom limitations
            # this means that we have only one agent (+ maybe some bots, which is why we're in multiplayer mode)
            return super().step(actions)

        self._ensure_initialized()

        actions_binary = self._convert_actions(actions)

        self.game.set_action(actions_binary)
        self.game.advance_action(1, self.update_state)
        self.timestep += 1

        if not self.update_state:
            return None, None, None, None

        state = self.game.get_state()
        reward = self.game.get_last_reward()
        done = self.game.is_episode_finished()

        if self.record_to is not None:
            # send 'stop recording' command 1 tick before the end of the episode
            # otherwise it does not get saved to disk
            if self.game.get_episode_time(
            ) + 1 == self.game.get_episode_timeout():
                log.debug('Calling stop recording command!')
                self.game.send_game_command('stop')

        observation, done, info = self._process_game_step(state, done, {})
        return observation, reward, done, info
Example #13
0
    def _perturb_param(self, param, param_name, default_param):
        # toss a coin whether we perturb the parameter at all
        if random.random() > self.cfg.pbt_mutation_rate:
            return param

        if param != default_param and random.random() < 0.05:
            # small chance to replace parameter with a default value
            log.debug('%s changed to default value %r', param_name,
                      default_param)
            return default_param

        if param_name in SPECIAL_PERTURBATION:
            new_value = SPECIAL_PERTURBATION[param_name](param, self.cfg)
        elif type(param) is bool:
            new_value = not param
        elif isinstance(param, numbers.Number):
            perturb_amount = random.uniform(1.01, 1.5)
            new_value = perturb_float(float(param),
                                      perturb_amount=perturb_amount)
        else:
            raise RuntimeError('Unsupported parameter type')

        log.debug('Param %s changed from %.6f to %.6f', param_name, param,
                  new_value)
        return new_value
Example #14
0
def register_custom_encoder(custom_encoder_name, encoder_cls):
    assert issubclass(
        encoder_cls,
        EncoderBase), 'Custom encoders must be derived from EncoderBase'
    assert custom_encoder_name not in ENCODER_REGISTRY

    log.debug('Adding model class %r to registry (with name %s)', encoder_cls,
              custom_encoder_name)
    ENCODER_REGISTRY[custom_encoder_name] = encoder_cls
Example #15
0
 def finish_initialization(self):
     """Wait until policy workers are fully initialized."""
     for policy_id, workers in self.policy_workers.items():
         for w in workers:
             log.debug(
                 'Waiting for policy worker %d-%d to finish initialization...',
                 policy_id, w.worker_idx)
             w.init()
             log.debug('Policy worker %d-%d initialized!', policy_id,
                       w.worker_idx)
Example #16
0
def register_additional_doom_env(doom_spec):
    try:
        spec = doom_env_by_name(doom_spec.name)
        log.error('Doom env spec %s already exists', spec.name)
        return
    except RuntimeError:
        pass

    log.debug('Registering Doom environment %s...', doom_spec.name)
    DOOM_ENVS.append(doom_spec)
Example #17
0
    def finalize(self):
        try:
            self.report_queue.get_many_nowait()
        except Empty:
            pass

        log.debug('Joining worker processes...')
        for p in self.processes:
            p.join()
        log.debug('Done joining!')
Example #18
0
def register_custom_encoder(custom_encoder_name, encoder_cls):
    if custom_encoder_name in ENCODER_REGISTRY:
        log.warning('Encoder %s already registered', custom_encoder_name)

    assert issubclass(
        encoder_cls,
        EncoderBase), 'Custom encoders must be derived from EncoderBase'

    log.debug('Adding model class %r to registry (with name %s)', encoder_cls,
              custom_encoder_name)
    ENCODER_REGISTRY[custom_encoder_name] = encoder_cls
Example #19
0
    def dbg_print(self):
        dbg_info = dict(
            entropy=self.entropy().mean(),
            min_logit=self.raw_logits.min(),
            max_logit=self.raw_logits.max(),
            min_prob=self.probs.min(),
            max_prob=self.probs.max(),
        )

        msg = ''
        for key, value in dbg_info.items():
            msg += f'{key}={value.cpu().item():.3f} '
        log.debug(msg)
Example #20
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', type=str, default=None, required=True)
    parser.add_argument('--demo_path', type=str, default=None, required=True)
    args = parser.parse_args()

    spec = doom_env_by_name(args.env)
    cfg = default_cfg(env=args.env)
    if spec.num_agents <= 1:
        env = make_doom_env(args.env, cfg=cfg, custom_resolution='1280x720')
    else:
        env = make_doom_env_impl(
            spec,
            cfg=cfg,
            custom_resolution='1280x720',
            player_id=0,
            num_agents=spec.num_agents,
            max_num_players=spec.num_agents,
            num_bots=spec.num_bots,
        )

    mode = 'replay'
    env.unwrapped.mode = mode
    env.unwrapped.initialize()
    game = env.unwrapped.game

    game.replay_episode(args.demo_path)

    frames_dir = args.demo_path + '_frames'
    if os.path.exists(frames_dir):
        shutil.rmtree(frames_dir)
    os.makedirs(frames_dir)

    frame_id = 0
    while not game.is_episode_finished():
        # Use advance_action instead of make_action.
        game.advance_action()
        img = env.render(mode='rgb_array')

        frame_name = f'{frame_id:05d}.png'
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        if img is not None:
            cv2.imwrite(join(frames_dir, frame_name), img)

        frame_id += 1

        r = game.get_last_reward()
        log.debug('Reward %.3f at frame %d', r, frame_id)

    game.close()
Example #21
0
 def _actors_update_shaping_scheme(self, policy_id):
     log.debug('Sending latest reward scheme to actors for policy %d...',
               policy_id)
     for actor_worker in self.actor_workers:
         reward_scheme_task = (PbtTask.UPDATE_REWARD_SCHEME,
                               (policy_id,
                                self.policy_reward_shaping[policy_id]))
         task = (TaskType.PBT, reward_scheme_task)
         try:
             actor_worker.task_queue.put(task, timeout=0.1)
         except Full:
             log.warning(
                 'Could not add task %r to queue, it is likely that worker died',
                 task)
Example #22
0
def register_default_envs(env_registry):
    """
    Register default envs.
    For this set of env families we register a function that can later create an actual registry entry when required.
    This allows us to import only Python modules that we use.

    """
    def doom_funcs():
        from sample_factory.envs.doom.doom_utils import make_doom_env
        from sample_factory.envs.doom.doom_params import add_doom_env_args, doom_override_defaults
        return make_doom_env, add_doom_env_args, doom_override_defaults

    def atari_funcs():
        from sample_factory.envs.atari.atari_utils import make_atari_env
        from sample_factory.envs.atari.atari_params import atari_override_defaults
        return make_atari_env, None, atari_override_defaults

    def dmlab_funcs():
        from sample_factory.envs.dmlab.dmlab_env import make_dmlab_env
        from sample_factory.envs.dmlab.dmlab_params import add_dmlab_env_args, dmlab_override_defaults
        return make_dmlab_env, add_dmlab_env_args, dmlab_override_defaults

    def mujoco_funcs():
        from sample_factory.envs.mujoco.mujoco_utils import make_mujoco_env
        from sample_factory.envs.mujoco.mujoco_params import add_mujoco_env_args, mujoco_override_defaults
        return make_mujoco_env, add_mujoco_env_args, mujoco_override_defaults

    def minigrid_funcs():
        from sample_factory.envs.minigrid.minigrid_utils import make_minigrid_env
        from sample_factory.envs.minigrid.minigrid_params import minigrid_override_defaults
        return make_minigrid_env, None, minigrid_override_defaults

    def voxel_env_funcs():
        from sample_factory.envs.voxel_env.voxel_env_utils import make_voxel_env, add_voxel_env_args, voxel_env_override_defaults
        return make_voxel_env, add_voxel_env_args, voxel_env_override_defaults

    default_envs = {
        'doom_': doom_funcs,
        'atari_': atari_funcs,
        'dmlab_': dmlab_funcs,
        'mujoco_': mujoco_funcs,
        'MiniGrid': minigrid_funcs,
        'voxel_env_': voxel_env_funcs,
    }

    for envs_prefix, env_funcs in default_envs.items():
        env_registry.register_env_deferred(envs_prefix, env_funcs)

    log.debug('Default env families supported: %r',
              [f'{k}*' for k in default_envs.keys()])
Example #23
0
    def __init__(self, env, initial_difficulty=None):
        super().__init__(env)

        self._min_difficulty = 0
        self._max_difficulty = 150
        self._difficulty_step = 10
        self._curr_difficulty = 20 if initial_difficulty is None else initial_difficulty
        self._difficulty_std = 10

        log.info('Starting with bot difficulty %d', self._curr_difficulty)

        self._adaptive_curriculum = True
        if initial_difficulty == self._max_difficulty:
            log.debug('Starting at max difficulty, disable adaptive skill curriculum')
            self._adaptive_curriculum = False
Example #24
0
    def register_env(
        self,
        env_name_prefix,
        make_env_func,
        add_extra_params_func=None,
        override_default_params_func=None,
    ):
        """
        A standard thing to do in RL frameworks is to just rely on unique environment names registered in Gym.
        SampleFactory supports a mechanism on top of that, we define "environment families", e.g. "atari", or "doom",
        and certain things can be defined per env family rather than for specific environment or experiment (such as
        default hyperparameters and env command line arguments).

        For every supported family of environments we require four components:

        :param env_name_prefix: name prefix, e.g. atari_. This allows us to register a single entry per env family
        rather than individual env. Prefix can also, of course, be a full name of the environment.

        :param make_env_func: Factory function that creates an environment instance.
        This function is called like:
        make_my_env(full_env_name, cfg=cfg, env_config=env_config)
        Where full_env_name is a name of the environment to be created, cfg is a namespace with all CLI arguments, and
        env_config is an auxiliary dictionary containing information such as worker index on which the environment lives
        (some envs may require this information)

        :param add_extra_params_func: (optional) function that adds additional parameters to the argument parser.
        This is a very easy way to make your envs configurable through command-line interface.

        :param override_default_params_func: (optional) function that can override the default command line arguments in
        the parser. Every environment demands its own unique set of model architectures and hyperparameters, so this
        mechanism allows us to specify these default parameters once per family of envs to avoid typing them every time we
        want to launch an experiment.

        See the sample_factory_examples for the default envs, it's actually very simple.

        If you want to use a Gym env, just create an empty make_env_func that ignores other parameters and
        instantiates a copy of your Gym environment.

        """

        assert callable(make_env_func), 'make_env_func should be callable'

        entry = EnvRegistryEntry(env_name_prefix, make_env_func,
                                 add_extra_params_func,
                                 override_default_params_func)
        self.registry[env_name_prefix] = entry

        log.debug('Env registry entry created: %s', env_name_prefix)
Example #25
0
    def _ensure_initialized(self):
        if self.initialized:
            return

        self.workers = [
            MultiAgentEnvWorker(i,
                                self.make_env_func,
                                self.env_config,
                                reset_on_init=self.reset_on_init)
            for i in range(self.num_agents)
        ]

        init_attempt = 0
        while True:
            init_attempt += 1
            try:
                port_to_use = udp_port_num(self.env_config)
                port = find_available_port(port_to_use, increment=1000)
                log.debug('Using port %d', port)
                init_info = dict(port=port)

                lock_file = doom_lock_file(max_parallel=20)
                lock = FileLock(lock_file)
                with lock.acquire(timeout=10):
                    for i, worker in enumerate(self.workers):
                        worker.task_queue.put((init_info, TaskType.INIT))
                        if self.safe_init:
                            time.sleep(1.0)  # just in case
                        else:
                            time.sleep(0.05)

                    for i, worker in enumerate(self.workers):
                        worker.result_queue.get(timeout=20)

            except filelock.Timeout:
                continue
            except Exception:
                raise RuntimeError(
                    'Critical error: worker stuck on initialization. Abort!')
            else:
                break

        log.debug('%d agent workers initialized for env %d!',
                  len(self.workers), self.env_config.worker_index)
        self.initialized = True
Example #26
0
    def init(self, learner_workers, actor_workers):
        self.learner_workers = learner_workers
        self.actor_workers = actor_workers

        for policy_id in range(self.cfg.num_policies):
            # save the policy-specific configs if they don't exist, or else load them from files
            policy_cfg_filename = policy_cfg_file(self.cfg, policy_id)
            if os.path.exists(policy_cfg_filename):
                with open(policy_cfg_filename, 'r') as json_file:
                    log.debug(
                        'Loading initial policy %d configuration from file %s',
                        policy_id, policy_cfg_filename)
                    json_params = json.load(json_file)
                    self.policy_cfg[policy_id] = json_params
            else:
                self.policy_cfg[policy_id] = dict()
                for param_name in HYPERPARAMS_TO_TUNE:
                    self.policy_cfg[policy_id][param_name] = self.cfg[
                        param_name]

                if policy_id > 0:  # keep one policy with default settings in the beginning
                    log.debug('Initial cfg mutation for policy %d', policy_id)
                    self.policy_cfg[policy_id] = self._perturb_cfg(
                        self.policy_cfg[policy_id])

        for policy_id in range(self.cfg.num_policies):
            # save the policy-specific reward shaping if it doesn't exist, or else load from file
            policy_reward_shaping_filename = policy_reward_shaping_file(
                self.cfg, policy_id)

            if os.path.exists(policy_reward_shaping_filename):
                with open(policy_reward_shaping_filename, 'r') as json_file:
                    log.debug(
                        'Loading policy %d reward shaping from file %s',
                        policy_id,
                        policy_reward_shaping_filename,
                    )
                    json_params = json.load(json_file)
                    self.policy_reward_shaping[policy_id] = json_params
            else:
                self.policy_reward_shaping[policy_id] = copy.deepcopy(
                    self.default_reward_shaping)
                if policy_id > 0:  # keep one policy with default settings in the beginning
                    log.debug('Initial rewards mutation for policy %d',
                              policy_id)
                    self.policy_reward_shaping[
                        policy_id] = self._perturb_reward(
                            self.policy_reward_shaping[policy_id])

        # send initial configuration to the system components
        for policy_id in range(self.cfg.num_policies):
            self._save_cfg(policy_id)
            self._save_reward_shaping(policy_id)
            self._learner_update_cfg(policy_id)
            self._actors_update_shaping_scheme(policy_id)
Example #27
0
def get_gpus_without_triggering_pytorch_cuda_initialization(envvars=None):
    if envvars is None:
        envvars = os.environ

    import subprocess
    out = subprocess.run([sys.executable, '-m', 'sample_factory.utils.get_available_gpus'], capture_output=True, env=envvars)
    text_output = out.stdout.decode()
    err_output = out.stderr.decode()
    returncode = out.returncode

    from sample_factory.utils.utils import log
    if returncode:
        log.error(
            'Querying available GPUs... return code %d, error: %s, stdout: %s', returncode, err_output, text_output,
        )

    log.debug('Queried available GPUs: %s', text_output)
    return text_output
Example #28
0
    def __init__(self, num_agents, make_env_func, env_config, skip_frames):
        gym.Env.__init__(self)
        RewardShapingInterface.__init__(self)

        self.num_agents = num_agents
        log.debug('Multi agent env, num agents: %d', self.num_agents)
        self.skip_frames = skip_frames  # number of frames to skip (1 = no skip)

        env = make_env_func(
            player_id=-1
        )  # temporary env just to query observation_space and stuff
        self.action_space = env.action_space
        self.observation_space = env.observation_space

        self.default_reward_shaping = get_default_reward_shaping(env)
        env.close()

        self.current_reward_shaping = [
            self.default_reward_shaping for _ in range(self.num_agents)
        ]

        self.make_env_func = make_env_func

        self.safe_init = env_config is not None and env_config.get(
            'safe_init', False)

        if self.safe_init:
            sleep_seconds = env_config.worker_index * 1.0
            log.info(
                'Sleeping %.3f seconds to avoid creating all envs at once',
                sleep_seconds)
            time.sleep(sleep_seconds)
            log.info('Done sleeping at %d', env_config.worker_index)

        self.env_config = env_config
        self.workers = None

        # only needed when rendering
        self.enable_rendering = False
        self.last_obs = None

        self.reset_on_init = True

        self.initialized = False
Example #29
0
    def record_used_seed(self, level, seed):
        self.num_seeds_used_in_current_run[level].value += 1
        log.debug('Updated number of used seeds for level %s (%d)', level,
                  self.num_seeds_used_in_current_run[level].value)

        used_lvl_seeds_dir = self.get_used_seeds_dir()
        used_seeds_filename = join(used_lvl_seeds_dir,
                                   level_to_filename(level))
        safe_ensure_dir_exists(os.path.dirname(used_seeds_filename))

        with open(used_seeds_filename, 'a') as fobj:
            fobj.write(f'{seed}\n')

        # this data structure is not shared across processes, but we mostly care about the initial
        # seeds anyway, which are initialized before the processes are forked
        if level not in self.used_seeds:
            self.used_seeds[level] = {seed}
        else:
            self.used_seeds[level].add(seed)
Example #30
0
    def __init__(self, cfg, obs_space, timing):
        super().__init__(cfg, timing)

        self.basic_encoder = create_standard_encoder(cfg, obs_space, timing)
        self.encoder_out_size = self.basic_encoder.encoder_out_size
        obs_shape = get_obs_shape(obs_space)

        self.measurements_head = None
        if 'measurements' in obs_shape:
            self.measurements_head = nn.Sequential(
                nn.Linear(obs_shape.measurements[0], 128),
                nonlinearity(cfg),
                nn.Linear(128, 128),
                nonlinearity(cfg),
            )
            measurements_out_size = calc_num_elements(self.measurements_head, obs_shape.measurements)
            self.encoder_out_size += measurements_out_size

        log.debug('Policy head output size: %r', self.get_encoder_out_size())