Python DiscreteArray Examples, acme.specs.DiscreteArray Python Examples

Example #1

0

Show file

File: fakes.py Project: hyunjay/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 obs_shape: Sequence[int] = (),
                 discount_spec: Optional[types.NestedSpec] = None,
                 reward_spec: Optional[types.NestedSpec] = None,
                 **kwargs):
        """Initialize the environment."""
        if reward_spec is None:
            reward_spec = specs.Array((), np.float32)

        if discount_spec is None:
            discount_spec = specs.BoundedArray((), np.float32, 0.0, 1.0)

        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=reward_spec,
                                                    discounts=discount_spec),
                         **kwargs)

Example #2

0

Show file

 def action_spec(self) -> Dict[str, specs.DiscreteArray]:
     action_specs = {}
     for agent in self.possible_agents:
         spec = self._environment.action_spec()
         action_specs[agent] = specs.DiscreteArray(spec["num_actions"],
                                                   np.int64)
     return action_specs

Example #3

0

Show file

File: actors_test.py Project: EXYNOS-999/acme

def _make_fake_env() -> dm_env.Environment:
  env_spec = specs.EnvironmentSpec(
      observations=specs.Array(shape=(10, 5), dtype=np.float32),
      actions=specs.DiscreteArray(num_values=3),
      rewards=specs.Array(shape=(), dtype=np.float32),
      discounts=specs.BoundedArray(
          shape=(), dtype=np.float32, minimum=0., maximum=1.),
  )
  return fakes.Environment(env_spec, episode_length=10)

Example #4

0

Show file

File: gym_wrapper.py Project: deepmind/acme

def _convert_to_spec(space: gym.Space,
                     name: Optional[str] = None) -> types.NestedSpec:
    """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs.

  Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray
  specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and
  Dict spaces are recursively converted to tuples and dictionaries of specs.

  Args:
    space: The Gym space to convert.
    name: Optional name to apply to all return spec(s).

  Returns:
    A dm_env spec or nested structure of specs, corresponding to the input
    space.
  """
    if isinstance(space, spaces.Discrete):
        return specs.DiscreteArray(num_values=space.n,
                                   dtype=space.dtype,
                                   name=name)

    elif isinstance(space, spaces.Box):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=space.low,
                                  maximum=space.high,
                                  name=name)

    elif isinstance(space, spaces.MultiBinary):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=0.0,
                                  maximum=1.0,
                                  name=name)

    elif isinstance(space, spaces.MultiDiscrete):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=np.zeros(space.shape),
                                  maximum=space.nvec - 1,
                                  name=name)

    elif isinstance(space, spaces.Tuple):
        return tuple(_convert_to_spec(s, name) for s in space.spaces)

    elif isinstance(space, spaces.Dict):
        return {
            key: _convert_to_spec(value, key)
            for key, value in space.spaces.items()
        }

    else:
        raise ValueError('Unexpected gym space: {}'.format(space))

Example #5

0

Show file

File: fakes.py Project: wilixx/acme

    def __init__(self,
                 *,
                 num_actions: int = 1,
                 num_observations: int = 1,
                 action_dtype=np.int32,
                 obs_dtype=np.int32,
                 reward_dtype=np.float32,
                 obs_shape: Sequence[int] = (),
                 **kwargs):
        """Initialize the environment."""
        actions = specs.DiscreteArray(num_actions, dtype=action_dtype)
        observations = specs.BoundedArray(shape=obs_shape,
                                          dtype=obs_dtype,
                                          minimum=obs_dtype(0),
                                          maximum=obs_dtype(num_observations -
                                                            1))
        rewards = specs.Array((), reward_dtype)
        discounts = specs.BoundedArray((), reward_dtype, 0.0, 1.0)

        super().__init__(spec=specs.EnvironmentSpec(observations=observations,
                                                    actions=actions,
                                                    rewards=rewards,
                                                    discounts=discounts),
                         **kwargs)

Example #6

0

Show file

File: rules.py Project: cristianvasquez/thingies_gym

 def action_spec(self) -> specs.DiscreteArray:
     """Returns the action spec."""
     return specs.DiscreteArray(dtype=np.int,
                                num_values=len(list(Action)),
                                name="action")

Example #7

0

Show file

File: open_spiel_wrapper.py Project: vishalbelsare/acme

 def action_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(
         self._environment.game.num_distinct_actions())

Example #8

0

Show file

File: builder.py Project: google-research/google-research

def discretize_spec(spec, num_actions):
    assert isinstance(spec.actions, specs.BoundedArray)
    return spec._replace(actions=specs.DiscreteArray(num_actions))

Example #9

0

Show file

File: mazeenv.py Project: srsohn/mtsgi

 def action_spec(self) -> types.NestedSpec:
   return specs.DiscreteArray(
       num_values=self.max_task,
       dtype=np.int32
   )

Example #10

0

Show file

    def init(self, params):

        if not _TF_USE_GPU:
            tf.config.set_visible_devices([], 'GPU')
        tf.config.threading.set_inter_op_parallelism_threads(_TF_NUM_THREADS)
        tf.config.threading.set_intra_op_parallelism_threads(_TF_NUM_THREADS)

        if params.seed:
            agent_seed = params.seed + sum([ord(c) for c in params.name])
            random.seed(agent_seed)
            np.random.seed(agent_seed)
            tf.random.set_seed(agent_seed)

        # Internalize params.
        self._params = params

        self._name = params.name

        # Whether learning stopped.
        self._stop = False

        # Define specs. Everything needs to be single precision by default.
        observation_spec = specs.Array(shape=(params.states.rank, ),
                                       dtype=np.float32,
                                       name='obs')
        action_spec = specs.DiscreteArray(dtype=np.int32,
                                          num_values=params.actions.depth,
                                          name="action")
        reward_spec = specs.Array(shape=(), dtype=np.float32, name='reward')
        discount_spec = specs.BoundedArray(shape=(),
                                           dtype=np.float32,
                                           minimum=0.,
                                           maximum=1.,
                                           name='discount')

        env_spec = specs.EnvironmentSpec(observations=observation_spec,
                                         actions=action_spec,
                                         rewards=reward_spec,
                                         discounts=discount_spec)

        # Logger.
        dir_path = f'{params.exp_path}/logs/{self._name}'
        self._logger = make_default_logger(directory=dir_path,
                                           label=self._name)
        agent_logger = make_default_logger(directory=dir_path,
                                           label=f'{self._name}-learning')

        network = Network(num_actions=env_spec.actions.num_values,
                          rnn_hidden_size=params.rnn_hidden_size,
                          head_layers=params.head_layers)

        self.agent = acme_agent.R2D2(
            environment_spec=env_spec,
            network=network,
            batch_size=params.batch_size,
            samples_per_insert=params.samples_per_insert,
            burn_in_length=params.burn_in_length,
            trace_length=params.trace_length,
            replay_period=params.replay_period,
            min_replay_size=params.min_replay_size,
            max_replay_size=params.max_replay_size,
            discount=params.discount_factor,
            prefetch_size=params.prefetch_size,
            target_update_period=params.target_update_period,
            importance_sampling_exponent=params.importance_sampling_exponent,
            priority_exponent=params.priority_exponent,
            epsilon_init=params.epsilon_init,
            epsilon_final=params.epsilon_final,
            epsilon_schedule_timesteps=params.epsilon_schedule_timesteps,
            learning_rate=params.learning_rate,
            store_lstm_state=params.store_lstm_state,
            max_priority_weight=params.max_priority_weight,
            logger=agent_logger,
            checkpoint=False,
        )

        # Observations counter.
        self._obs_counter = 0

Example #11

0

Show file

File: minerl_wrapper.py Project: dzorlu/acme

 def action_spec(self):
   return specs.DiscreteArray(num_values=self.num_actions)