Exemple #1
0
        def true_fn():
            batch_size = 1
            req = force_tuple(
                action_dist.required_model_output_shape(
                    self.action_space, getattr(self.model, "model_config",
                                               None)))
            # Add a batch dimension?
            if len(action_dist.inputs.shape) == len(req) + 1:
                batch_size = tf.shape(action_dist.inputs)[0]

            # Function to produce random samples from primitive space
            # components: (Multi)Discrete or Box.
            def random_component(component):
                if isinstance(component, Discrete):
                    return tf.random.uniform(
                        shape=(batch_size, ) + component.shape,
                        maxval=component.n,
                        dtype=component.dtype)
                elif isinstance(component, MultiDiscrete):
                    return tf.concat(
                        [
                            tf.random.uniform(
                                shape=(batch_size, 1),
                                maxval=n,
                                dtype=component.dtype) for n in component.nvec
                        ],
                        axis=1)
                elif isinstance(component, Box):
                    if component.bounded_above.all() and \
                            component.bounded_below.all():
                        if component.dtype.name.startswith("int"):
                            return tf.random.uniform(
                                shape=(batch_size, ) + component.shape,
                                minval=component.low.flat[0],
                                maxval=component.high.flat[0],
                                dtype=component.dtype)
                        else:
                            return tf.random.uniform(
                                shape=(batch_size, ) + component.shape,
                                minval=component.low,
                                maxval=component.high,
                                dtype=component.dtype)
                    else:
                        return tf.random.normal(
                            shape=(batch_size, ) + component.shape,
                            dtype=component.dtype)
                else:
                    assert isinstance(component, Simplex), \
                        "Unsupported distribution component '{}' for random " \
                        "sampling!".format(component)
                    return tf.nn.softmax(
                        tf.random.uniform(
                            shape=(batch_size, ) + component.shape,
                            minval=0.0,
                            maxval=1.0,
                            dtype=component.dtype))

            actions = tree.map_structure(random_component,
                                         self.action_space_struct)
            return actions
Exemple #2
0
 def get_torch_exploration_action(self, action_dist, explore):
     if explore:
         # Unsqueeze will be unnecessary, once we support batch/time-aware
         # Spaces.
         a = self.action_space.sample()
         req = force_tuple(
             action_dist.required_model_output_shape(
                 self.action_space, self.model.model_config))
         # Add a batch dimension.
         if len(action_dist.inputs.shape) == len(req) + 1:
             a = np.expand_dims(a, 0)
         action = torch.from_numpy(a).to(self.device)
     else:
         action = action_dist.deterministic_sample()
     logp = torch.zeros(
         (action.size()[0], ), dtype=torch.float32, device=self.device)
     return action, logp
Exemple #3
0
 def get_torch_exploration_action(self, action_dist, explore):
     if explore:
         req = force_tuple(
             action_dist.required_model_output_shape(
                 self.action_space, self.model.model_config))
         # Add a batch dimension?
         if len(action_dist.inputs.shape) == len(req) + 1:
             batch_size = action_dist.inputs.shape[0]
             a = np.stack(
                 [self.action_space.sample() for _ in range(batch_size)])
         else:
             a = self.action_space.sample()
         # Convert action to torch tensor.
         action = torch.from_numpy(a).to(self.device)
     else:
         action = action_dist.deterministic_sample()
     logp = torch.zeros((action.size()[0], ),
                        dtype=torch.float32,
                        device=self.device)
     return action, logp
Exemple #4
0
        def true_fn():
            batch_size = 1
            req = force_tuple(
                action_dist.required_model_output_shape(
                    self.action_space, self.model.model_config))
            # Add a batch dimension?
            if len(action_dist.inputs.shape) == len(req) + 1:
                batch_size = tf.shape(action_dist.inputs)[0]

            # Function to produce random samples from primitive space
            # components: (Multi)Discrete or Box.
            def random_component(component):
                if isinstance(component, Discrete):
                    return tf.random.uniform(shape=(batch_size, ) +
                                             component.shape,
                                             maxval=component.n,
                                             dtype=component.dtype)
                elif isinstance(component, MultiDiscrete):
                    return tf.concat([
                        tf.random.uniform(shape=(batch_size, 1),
                                          maxval=n,
                                          dtype=component.dtype)
                        for n in component.nvec
                    ],
                                     axis=1)
                elif isinstance(component, Box):
                    if component.bounded_above.all() and \
                            component.bounded_below.all():
                        return tf.random.uniform(shape=(batch_size, ) +
                                                 component.shape,
                                                 minval=component.low,
                                                 maxval=component.high,
                                                 dtype=component.dtype)
                    else:
                        return tf.random.normal(shape=(batch_size, ) +
                                                component.shape,
                                                dtype=component.dtype)

            actions = tree.map_structure(random_component,
                                         self.action_space_struct)
            return actions