def __init__(self, distribution, spec, name="SquashToSpecDistribution"): self.action_means, self.action_magnitudes = common.spec_means_and_magnitudes( spec) bijectors = [ tfp.bijectors.AffineScalar( shift=self.action_means, scale=self.action_magnitudes), tanh_bijector_stable.Tanh() ] bijector_chain = tfp.bijectors.Chain(bijectors) super(SquashToSpecDistribution, self).__init__( distribution, bijector_chain, name=name)
def __init__(self, distribution, spec, validate_args=False, name="SquashToSpecNormal"): """Constructs a SquashToSpecNormal distribution. Args: distribution: input normal distribution with normalized mean and std dev spec: bounded action spec from which to compute action ranges validate_args: Python `bool`, default `False`. When `True` distribution parameters are checked for validity despite possibly degrading runtime performance. When `False` invalid inputs may silently render incorrect outputs. name: Python `str` name prefixed to Ops created by this class. """ if not isinstance(distribution, (tfp.distributions.Normal, tfp.distributions.MultivariateNormalDiag)): raise ValueError( "Input distribution must be a normal distribution, " "got {} instead".format(distribution)) self.action_means, self.action_magnitudes = common.spec_means_and_magnitudes( spec) # Parameters here describe the actor network's output, which is a normalized # distribution prior to squashing to the action spec. # This is necessary (and sufficient) in order for policy info to compare an # old policy to a new policy. parameters = {"loc": distribution.loc, "scale": distribution.scale} # The raw action distribution self.input_distribution = distribution bijectors = [ tfp.bijectors.Shift(self.action_means)(tfp.bijectors.Scale( self.action_magnitudes)), tanh_bijector_stable.Tanh() ] bijector_chain = tfp.bijectors.Chain(bijectors) self._squashed_distribution = tfp.distributions.TransformedDistribution( distribution=distribution, bijector=bijector_chain) super(SquashToSpecNormal, self).__init__( dtype=distribution.dtype, reparameterization_type=distribution.reparameterization_type, validate_args=validate_args, allow_nan_stats=distribution.allow_nan_stats, parameters=parameters, # We let TransformedDistribution access _graph_parents since this class # is more like a baseclass than derived. graph_parents=( distribution._graph_parents + # pylint: disable=protected-access bijector_chain.graph_parents), name=name)
def testSpecMeansAndMagnitudes(self): spec = tensor_spec.BoundedTensorSpec( (3, 2), tf.float32, [[-5, -5], [-4, -4], [-2, -6]], [[5, 5], [4, 4], [2, 6]], ) means, magnitudes = self.evaluate(common.spec_means_and_magnitudes(spec)) expected_means = np.zeros((3, 2), dtype=np.float32) expected_magnitudes = np.array([[5.0, 5.0], [4.0, 4.0], [2.0, 6.0]], dtype=np.float32) self.assertAllClose(expected_means, means) self.assertAllClose(expected_magnitudes, magnitudes)
def testSpecMeansAndMagnitudes(self): spec = array_spec.BoundedArraySpec( (3, 2), np.float32, np.array([[-5, -5], [-4, -4], [-2, -6]]), np.array([[5, 5], [4, 4], [2, 6]]), ) means, magnitudes = common.spec_means_and_magnitudes(spec) expected_means = np.zeros((3, 2), dtype=np.float32) expected_magnitudes = np.array([[5.0, 5.0], [4.0, 4.0], [2.0, 6.0]], dtype=np.float32) self.assertAllClose(expected_means, means) self.assertAllClose(expected_magnitudes, magnitudes)
def scale_distribution_to_spec(distribution, spec): """Scales the given distribution to the bounds of the given spec.""" bijectors = [] # Bijector to rescale actions to ranges in action spec. action_means, action_magnitudes = common.spec_means_and_magnitudes(spec) bijectors.append( tfp.bijectors.AffineScalar(shift=action_means, scale=action_magnitudes)) # Bijector to squash actions to range (-1.0, +1.0). bijectors.append(tanh_bijector_stable.Tanh()) # Chain applies bijectors in reverse order, so squash will happen # before rescaling to action spec. bijector_chain = tfp.bijectors.Chain(bijectors) distributions = tfp.distributions.TransformedDistribution( distribution=distribution, bijector=bijector_chain) return distributions