def testClipToBounds(self): value = tf.constant([1, 2, 4, -3]) spec = tensor_spec.BoundedTensorSpec((4, ), tf.float32, [0, 0, 0, 0], [3, 3, 3, 3]) expected_clipped_value = np.array([1, 2, 3, 0]) clipped_value = common.clip_to_spec(value, spec) clipped_value_ = self.evaluate(clipped_value) self.assertAllClose(expected_clipped_value, clipped_value_)
def testScaledDistribution(self): output_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -2, 4) network = normal_projection_network.NormalProjectionNetwork( output_spec, init_means_output_factor=10, state_dependent_std=True, scale_distribution=True) inputs = _get_inputs(batch_size=100, num_input_dims=5) distributions, _ = network(inputs, outer_rank=1) self.evaluate(tf.compat.v1.global_variables_initializer()) sample = self.evaluate(distributions.sample()) clipped = self.evaluate(common.clip_to_spec(sample, output_spec)) np.testing.assert_almost_equal(clipped, sample)
def clip_action(action, action_spec): if isinstance(action_spec, tensor_spec.BoundedTensorSpec): return common.clip_to_spec(action, action_spec) return action
def _sample(dist, action_spec): action = dist.sample(seed=seed_stream()) if self._clip: return common_utils.clip_to_spec(action, action_spec) return action
def _add_ou_noise(action, ou_process, action_spec): noisy_action = action + ou_process() if self._clip: return common.clip_to_spec(noisy_action, action_spec) return noisy_action
def _sample(dist, action_spec): action = dist.sample(seed=seed) if self._clip: return common.clip_to_spec(action, action_spec) return action