Python DiscreteArrayの例、acme.specs.DiscreteArray Pythonの例

コード例 #1

0

ファイルを表示

ファイル: fakes.py プロジェクト: hyunjay/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 obs_shape: Sequence[int] = (),
                 discount_spec: Optional[types.NestedSpec] = None,
                 reward_spec: Optional[types.NestedSpec] = None,
                 **kwargs):
        """Initialize the environment."""
        if reward_spec is None:
            reward_spec = specs.Array((), np.float32)

        if discount_spec is None:
            discount_spec = specs.BoundedArray((), np.float32, 0.0, 1.0)

        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=reward_spec,
                                                    discounts=discount_spec),
                         **kwargs)

コード例 #2

0

ファイルを表示

 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     action_specs = {}
     for agent in self.possible_agents:
         spec = self._environment.action_spec()
         action_specs[agent] = specs.DiscreteArray(spec["num_actions"],
                                                   np.int64)
     return action_specs

コード例 #3

0

ファイルを表示

ファイル: actors_test.py プロジェクト: EXYNOS-999/acme

def _make_fake_env() -> dm_env.Environment:
  env_spec = specs.EnvironmentSpec(
      observations=specs.Array(shape=(10, 5), dtype=np.float32),
      actions=specs.DiscreteArray(num_values=3),
      rewards=specs.Array(shape=(), dtype=np.float32),
      discounts=specs.BoundedArray(
          shape=(), dtype=np.float32, minimum=0., maximum=1.),
  )
  return fakes.Environment(env_spec, episode_length=10)

コード例 #4

0

ファイルを表示

ファイル: gym_wrapper.py プロジェクト: deepmind/acme

def _convert_to_spec(space: gym.Space,
                     name: Optional[str] = None) -> types.NestedSpec:
    """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs.

  Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray
  specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and
  Dict spaces are recursively converted to tuples and dictionaries of specs.

  Args:
    space: The Gym space to convert.
    name: Optional name to apply to all return spec(s).

  Returns:
    A dm_env spec or nested structure of specs, corresponding to the input
    space.
  """
    if isinstance(space, spaces.Discrete):
        return specs.DiscreteArray(num_values=space.n,
                                   dtype=space.dtype,
                                   name=name)

    elif isinstance(space, spaces.Box):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=space.low,
                                  maximum=space.high,
                                  name=name)

    elif isinstance(space, spaces.MultiBinary):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=0.0,
                                  maximum=1.0,
                                  name=name)

    elif isinstance(space, spaces.MultiDiscrete):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=np.zeros(space.shape),
                                  maximum=space.nvec - 1,
                                  name=name)

    elif isinstance(space, spaces.Tuple):
        return tuple(_convert_to_spec(s, name) for s in space.spaces)

    elif isinstance(space, spaces.Dict):
        return {
            key: _convert_to_spec(value, key)
            for key, value in space.spaces.items()
        }

    else:
        raise ValueError('Unexpected gym space: {}'.format(space))

コード例 #5

0

ファイルを表示

ファイル: fakes.py プロジェクト: wilixx/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 reward_dtype=np.float32,
                 obs_shape: Sequence[int] = (),
                 **kwargs):
        """Initialize the environment."""
        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))
        rewards = specs.Array((), reward_dtype)
        discounts = specs.BoundedArray((), reward_dtype, 0.0, 1.0)

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=rewards,
                                                    discounts=discounts),
                         **kwargs)

コード例 #6

0

ファイルを表示

ファイル: rules.py プロジェクト: cristianvasquez/thingies_gym

 def action_spec(self) -> specs.DiscreteArray:
     """Returns the action spec."""
     return specs.DiscreteArray(dtype=np.int,
                                num_values=len(list(Action)),
                                name="action")

コード例 #7

0

ファイルを表示

ファイル: open_spiel_wrapper.py プロジェクト: vishalbelsare/acme

 def action_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(
         self._environment.game.num_distinct_actions())

コード例 #8

0

ファイルを表示

ファイル: builder.py プロジェクト: google-research/google-research

def discretize_spec(spec, num_actions):
    assert isinstance(spec.actions, specs.BoundedArray)
    return spec._replace(actions=specs.DiscreteArray(num_actions))

コード例 #9

0

ファイルを表示

ファイル: mazeenv.py プロジェクト: srsohn/mtsgi

 def action_spec(self) -> types.NestedSpec:
   return specs.DiscreteArray(
       num_values=self.max_task,
       dtype=np.int32
   )

コード例 #10

0

ファイルを表示

    def init(self, params):

        if not _TF_USE_GPU:
            tf.config.set_visible_devices([], 'GPU')
        tf.config.threading.set_inter_op_parallelism_threads(_TF_NUM_THREADS)
        tf.config.threading.set_intra_op_parallelism_threads(_TF_NUM_THREADS)

        if params.seed:
            agent_seed = params.seed + sum([ord(c) for c in params.name])
            random.seed(agent_seed)
            np.random.seed(agent_seed)
            tf.random.set_seed(agent_seed)

        # Internalize params.
        self._params = params

        self._name = params.name

        # Whether learning stopped.
        self._stop = False

        # Define specs. Everything needs to be single precision by default.
        observation_spec = specs.Array(shape=(params.states.rank, ),
                                       dtype=np.float32,
                                       name='obs')
        action_spec = specs.DiscreteArray(dtype=np.int32,
                                          num_values=params.actions.depth,
                                          name="action")
        reward_spec = specs.Array(shape=(), dtype=np.float32, name='reward')
        discount_spec = specs.BoundedArray(shape=(),
                                           dtype=np.float32,
                                           minimum=0.,
                                           maximum=1.,
                                           name='discount')

        env_spec = specs.EnvironmentSpec(observations=observation_spec,
                                         actions=action_spec,
                                         rewards=reward_spec,
                                         discounts=discount_spec)

        # Logger.
        dir_path = f'{params.exp_path}/logs/{self._name}'
        self._logger = make_default_logger(directory=dir_path,
                                           label=self._name)
        agent_logger = make_default_logger(directory=dir_path,
                                           label=f'{self._name}-learning')

        network = Network(num_actions=env_spec.actions.num_values,
                          rnn_hidden_size=params.rnn_hidden_size,
                          head_layers=params.head_layers)

        self.agent = acme_agent.R2D2(
            environment_spec=env_spec,
            network=network,
            batch_size=params.batch_size,
            samples_per_insert=params.samples_per_insert,
            burn_in_length=params.burn_in_length,
            trace_length=params.trace_length,
            replay_period=params.replay_period,
            min_replay_size=params.min_replay_size,
            max_replay_size=params.max_replay_size,
            discount=params.discount_factor,
            prefetch_size=params.prefetch_size,
            target_update_period=params.target_update_period,
            importance_sampling_exponent=params.importance_sampling_exponent,
            priority_exponent=params.priority_exponent,
            epsilon_init=params.epsilon_init,
            epsilon_final=params.epsilon_final,
            epsilon_schedule_timesteps=params.epsilon_schedule_timesteps,
            learning_rate=params.learning_rate,
            store_lstm_state=params.store_lstm_state,
            max_priority_weight=params.max_priority_weight,
            logger=agent_logger,
            checkpoint=False,
        )

        # Observations counter.
        self._obs_counter = 0

コード例 #11

0

ファイルを表示

ファイル: minerl_wrapper.py プロジェクト: dzorlu/acme

 def action_spec(self):
   return specs.DiscreteArray(num_values=self.num_actions)