Beispiel #1
0
 def projection_net_factory(action_spec):
     return normal_projection_network.NormalProjectionNetwork(
         action_spec,
         mean_transform=None,
         state_dependent_std=True,
         std_transform=sac_agent.std_clip_transform,
         scale_distribution=True)
Beispiel #2
0
 def _normal_projection_net(action_spec, init_means_output_factor=0.1):
     return normal_projection_network.NormalProjectionNetwork(
         action_spec,
         mean_transform=None,
         state_dependent_std=True,
         init_means_output_factor=init_means_output_factor,
         std_transform=sac_agent.std_clip_transform,
         scale_distribution=True)
def _normal_projection_net(action_spec,
                           init_action_stddev=0.35,
                           init_means_output_factor=0.1):
    std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1)

    return normal_projection_network.NormalProjectionNetwork(
        action_spec,
        init_means_output_factor=init_means_output_factor,
        std_bias_initializer_value=std_bias_initializer_value)
Beispiel #4
0
def normal_projection_net(action_spec,
                          init_action_stddev=0.35,
                          init_means_output_factor=0.1):
    del init_action_stddev
    return normal_projection_network.NormalProjectionNetwork(
        action_spec,
        mean_transform=None,
        state_dependent_std=True,
        init_means_output_factor=init_means_output_factor,
        std_transform=sac_agent.std_clip_transform)
Beispiel #5
0
def _critic_normal_projection_net(output_spec,
                                  init_stddev=0.35,
                                  init_means_output_factor=0.1):
    std_bias_initializer_value = np.log(np.exp(init_stddev) - 1)

    return normal_projection_network.NormalProjectionNetwork(
        output_spec,
        init_means_output_factor=init_means_output_factor,
        std_bias_initializer_value=std_bias_initializer_value,
        scale_distribution=False)
Beispiel #6
0
def normal_projection_net(action_spec,
                          init_action_stddev=0.35,
                          init_means_output_factor=0.1,
                          scale_distribution=True):
    del init_action_stddev
    return normal_projection_network.NormalProjectionNetwork(
        action_spec,
        state_dependent_std=True,
        init_means_output_factor=init_means_output_factor,
        std_transform=sac_agent.std_clip_transform,
        scale_distribution=scale_distribution)
Beispiel #7
0
def normal_projection_net(action_spec,
                          init_action_stddev=0.35,
                          init_means_output_factor=0.1):
  std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1)
  return normal_projection_network.NormalProjectionNetwork(
      action_spec,
      mean_transform=None,
      state_dependent_std=True,
      init_means_output_factor=init_means_output_factor,
      std_transform=sac_agent.std_clip_transform,
      std_bias_initializer_value=std_bias_initializer_value,
      scale_distribution=True,
  )
def _normal_projection_net(action_spec,
                           init_action_stddev=0.35,
                           init_means_output_factor=0.1,
                           seed_stream_class=tfp.util.SeedStream,
                           seed=None):
    std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1)

    return normal_projection_network.NormalProjectionNetwork(
        action_spec,
        init_means_output_factor=init_means_output_factor,
        std_bias_initializer_value=std_bias_initializer_value,
        scale_distribution=False,
        seed_stream_class=seed_stream_class,
        seed=seed)
Beispiel #9
0
  def testTrainableVariables(self):
    output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
    network = normal_projection_network.NormalProjectionNetwork(output_spec)

    inputs = _get_inputs(batch_size=3, num_input_dims=5)

    network(inputs, outer_rank=1)
    self.evaluate(tf.compat.v1.global_variables_initializer())

    # Dense kernel, dense bias, std bias.
    self.assertEqual(3, len(network.trainable_variables))
    self.assertEqual((5, 2), network.trainable_variables[0].shape)
    self.assertEqual((2,), network.trainable_variables[1].shape)
    self.assertEqual((2,), network.trainable_variables[2].shape)
Beispiel #10
0
  def testScaledDistribution(self):
    output_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -2, 4)
    network = normal_projection_network.NormalProjectionNetwork(
        output_spec, init_means_output_factor=10, state_dependent_std=True,
        scale_distribution=True)

    inputs = _get_inputs(batch_size=100, num_input_dims=5)

    distributions, _ = network(inputs, outer_rank=1)
    self.evaluate(tf.compat.v1.global_variables_initializer())

    sample = self.evaluate(distributions.sample())
    clipped = self.evaluate(common.clip_to_spec(sample, output_spec))
    np.testing.assert_almost_equal(clipped, sample)
Beispiel #11
0
def _critic_normal_projection_net(output_spec,
                                  init_stddev=0.35,
                                  init_means_output_factor=0.1):
    del init_stddev
    # std_bias_initializer_value = round(np.log(init_action_stddev + 1e-10), 3)

    return normal_projection_network.NormalProjectionNetwork(
        output_spec,
        init_means_output_factor=init_means_output_factor,
        # std_bias_initializer_value=std_bias_initializer_value,
        mean_transform=None,
        std_transform=sac_agent.std_clip_transform,
        state_dependent_std=True,
        scale_distribution=False)
  def testBuildStateDepStddev(self):
    output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
    network = normal_projection_network.NormalProjectionNetwork(
        output_spec, state_dependent_std=True, scale_distribution=False)

    inputs = _get_inputs(batch_size=3, num_input_dims=5)

    distribution, _ = network(inputs, outer_rank=1)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertEqual(tfp.distributions.Normal, type(distribution))

    means, stds = distribution.loc, distribution.scale

    self.assertAllEqual(means.shape.as_list(),
                        [3] + output_spec.shape.as_list())
    self.assertAllEqual(stds.shape.as_list(), [3] + output_spec.shape.as_list())
Beispiel #13
0
  def testBuild(self):
    output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
    network = normal_projection_network.NormalProjectionNetwork(
        output_spec, scale_distribution=False)

    inputs = _get_inputs(batch_size=3, num_input_dims=5)

    distribution, _ = network(inputs, outer_rank=1)
    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertIsInstance(
        distribution, tfp.distributions.MultivariateNormalDiag)

    means, stds = distribution.mean(), distribution.stddev()

    self.assertAllEqual(means.shape.as_list(),
                        [3] + output_spec.shape.as_list())
    self.assertAllEqual(stds.shape.as_list(), [3] + output_spec.shape.as_list())