def __init__(self, sample_spec, activation_fn=None, init_means_output_factor=0.1, std_initializer_value=0.0, mean_transform=tanh_squash_to_spec, std_transform=tf.nn.softplus, state_dependent_std=False, name='NormalProjectionNetwork'): """Creates an instance of NormalProjectionNetwork. Args: sample_spec: An spec (either BoundedArraySpec or BoundedTensorSpec) detailing the shape and dtypes of samples pulled from the output distribution. activation_fn: Activation function to use in dense layer. init_means_output_factor: Output factor for initializing action means weights. std_initializer_value: Initial value for std variables. mean_transform: Transform to apply to the calculated means std_transform: Transform to apply to the stddevs. state_dependent_std: If true, stddevs will be produced by MLP from state. else, stddevs will be an independent variable. name: A string representing name of the network. """ output_spec = self._output_distribution_spec(sample_spec) super(NormalProjectionNetwork, self).__init__( # We don't need these, but base class requires them. observation_spec=None, action_spec=None, state_spec=(), output_spec=output_spec, name=name) self._sample_spec = sample_spec self._mean_transform = mean_transform self._std_transform = std_transform self._state_dependent_std = state_dependent_std self._means_projection_layer = tf.keras.layers.Dense( sample_spec.shape.num_elements(), activation=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=init_means_output_factor), bias_initializer=tf.keras.initializers.Zeros(), name='means_projection_layer') self._stddev_projection_layer = None if self._state_dependent_std: self._stddev_projection_layer = tf.keras.layers.Dense( sample_spec.shape.num_elements(), activation=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=init_means_output_factor), bias_initializer=tf.keras.initializers.Zeros(), name='stddev_projection_layer') self._bias = bias_layer.BiasLayer( bias_initializer=tf.keras.initializers.Constant( value=std_initializer_value))
def testTrainableVariables(self): bias = bias_layer.BiasLayer( bias_initializer=tf.keras.initializers.Constant(value=1.0)) states = tf.zeros((2, 3)) _ = bias(states) self.evaluate(tf.compat.v1.global_variables_initializer()) variables = bias.trainable_variables np.testing.assert_almost_equal([[1.0] * 3], self.evaluate(variables))
def __init__(self, output_spec, activation_fn=None, init_means_output_factor=0.1, std_initializer_value=0.0, mean_transform=tanh_squash_to_spec, std_transform=tf.nn.softplus, name='NormalProjectionNetwork'): """Creates an instance of NormalProjectionNetwork. Args: output_spec: An output spec (either BoundedArraySpec or BoundedTensorSpec). activation_fn: Activation function to use in dense layer. init_means_output_factor: Output factor for initializing action means weights. std_initializer_value: Initial value for std variables. mean_transform: Transform to apply to the calculated means std_transform: Transform to apply to the stddevs. name: A string representing name of the network. """ super(NormalProjectionNetwork, self).__init__( # We don't need these, but base class requires them. observation_spec=None, action_spec=None, state_spec=(), name=name) self._output_spec = output_spec self._mean_transform = mean_transform self._std_transform = std_transform self._projection_layer = tf.keras.layers.Dense( output_spec.shape.num_elements(), activation=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=init_means_output_factor), bias_initializer=tf.keras.initializers.Zeros(), name='normal_projection_layer') self._bias = bias_layer.BiasLayer( bias_initializer=tf.keras.initializers.Constant( value=std_initializer_value))
def __init__(self, sample_spec, activation_fn=None, init_means_output_factor=0.1, std_bias_initializer_value=0.0, mean_transform=tanh_squash_to_spec, std_transform=tf.nn.softplus, state_dependent_std=False, scale_distribution=False, name='NormalProjectionNetwork'): """Creates an instance of NormalProjectionNetwork. Args: sample_spec: A `tensor_spec.BoundedTensorSpec` detailing the shape and dtypes of samples pulled from the output distribution. activation_fn: Activation function to use in dense layer. init_means_output_factor: Output factor for initializing action means weights. std_bias_initializer_value: Initial value for the bias of the stddev_projection_layer or the direct bias_layer depending on the state_dependent_std flag. mean_transform: Transform to apply to the calculated means. Uses `tanh_squash_to_spec` by default. std_transform: Transform to apply to the stddevs. state_dependent_std: If true, stddevs will be produced by MLP from state. else, stddevs will be an independent variable. scale_distribution: Whether or not to use a bijector chain to scale distributions to match the sample spec. Note the TransformedDistribution does not support certain operations required by some agents or policies such as KL divergence calculations or Mode. name: A string representing name of the network. """ if len(tf.nest.flatten(sample_spec)) != 1: raise ValueError( 'Normal Projection network only supports single spec ' 'samples.') self._scale_distribution = scale_distribution output_spec = self._output_distribution_spec(sample_spec, name) super(NormalProjectionNetwork, self).__init__( # We don't need these, but base class requires them. input_tensor_spec=None, state_spec=(), output_spec=output_spec, name=name) self._sample_spec = sample_spec self._mean_transform = mean_transform self._std_transform = std_transform self._state_dependent_std = state_dependent_std self._means_projection_layer = tf.keras.layers.Dense( sample_spec.shape.num_elements(), activation=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=init_means_output_factor), bias_initializer=tf.keras.initializers.Zeros(), name='means_projection_layer') self._stddev_projection_layer = None if self._state_dependent_std: self._stddev_projection_layer = tf.keras.layers.Dense( sample_spec.shape.num_elements(), activation=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers. VarianceScaling(scale=init_means_output_factor), bias_initializer=tf.keras.initializers.Constant( value=std_bias_initializer_value), name='stddev_projection_layer') else: self._bias = bias_layer.BiasLayer( bias_initializer=tf.keras.initializers.Constant( value=std_bias_initializer_value))
def testBuild(self): bias = bias_layer.BiasLayer() states = tf.ones((2, 3)) out = bias(states) self.evaluate(tf.compat.v1.global_variables_initializer()) np.testing.assert_almost_equal([[1.0] * 3] * 2, self.evaluate(out))