def projection_net_factory(action_spec): return normal_projection_network.NormalProjectionNetwork( action_spec, mean_transform=None, state_dependent_std=True, std_transform=sac_agent.std_clip_transform, scale_distribution=True)
def _normal_projection_net(action_spec, init_means_output_factor=0.1): return normal_projection_network.NormalProjectionNetwork( action_spec, mean_transform=None, state_dependent_std=True, init_means_output_factor=init_means_output_factor, std_transform=sac_agent.std_clip_transform, scale_distribution=True)
def _normal_projection_net(action_spec, init_action_stddev=0.35, init_means_output_factor=0.1): std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1) return normal_projection_network.NormalProjectionNetwork( action_spec, init_means_output_factor=init_means_output_factor, std_bias_initializer_value=std_bias_initializer_value)
def normal_projection_net(action_spec, init_action_stddev=0.35, init_means_output_factor=0.1): del init_action_stddev return normal_projection_network.NormalProjectionNetwork( action_spec, mean_transform=None, state_dependent_std=True, init_means_output_factor=init_means_output_factor, std_transform=sac_agent.std_clip_transform)
def _critic_normal_projection_net(output_spec, init_stddev=0.35, init_means_output_factor=0.1): std_bias_initializer_value = np.log(np.exp(init_stddev) - 1) return normal_projection_network.NormalProjectionNetwork( output_spec, init_means_output_factor=init_means_output_factor, std_bias_initializer_value=std_bias_initializer_value, scale_distribution=False)
def normal_projection_net(action_spec, init_action_stddev=0.35, init_means_output_factor=0.1, scale_distribution=True): del init_action_stddev return normal_projection_network.NormalProjectionNetwork( action_spec, state_dependent_std=True, init_means_output_factor=init_means_output_factor, std_transform=sac_agent.std_clip_transform, scale_distribution=scale_distribution)
def normal_projection_net(action_spec, init_action_stddev=0.35, init_means_output_factor=0.1): std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1) return normal_projection_network.NormalProjectionNetwork( action_spec, mean_transform=None, state_dependent_std=True, init_means_output_factor=init_means_output_factor, std_transform=sac_agent.std_clip_transform, std_bias_initializer_value=std_bias_initializer_value, scale_distribution=True, )
def _normal_projection_net(action_spec, init_action_stddev=0.35, init_means_output_factor=0.1, seed_stream_class=tfp.util.SeedStream, seed=None): std_bias_initializer_value = np.log(np.exp(init_action_stddev) - 1) return normal_projection_network.NormalProjectionNetwork( action_spec, init_means_output_factor=init_means_output_factor, std_bias_initializer_value=std_bias_initializer_value, scale_distribution=False, seed_stream_class=seed_stream_class, seed=seed)
def testTrainableVariables(self): output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1) network = normal_projection_network.NormalProjectionNetwork(output_spec) inputs = _get_inputs(batch_size=3, num_input_dims=5) network(inputs, outer_rank=1) self.evaluate(tf.compat.v1.global_variables_initializer()) # Dense kernel, dense bias, std bias. self.assertEqual(3, len(network.trainable_variables)) self.assertEqual((5, 2), network.trainable_variables[0].shape) self.assertEqual((2,), network.trainable_variables[1].shape) self.assertEqual((2,), network.trainable_variables[2].shape)
def testScaledDistribution(self): output_spec = tensor_spec.BoundedTensorSpec([1], tf.float32, -2, 4) network = normal_projection_network.NormalProjectionNetwork( output_spec, init_means_output_factor=10, state_dependent_std=True, scale_distribution=True) inputs = _get_inputs(batch_size=100, num_input_dims=5) distributions, _ = network(inputs, outer_rank=1) self.evaluate(tf.compat.v1.global_variables_initializer()) sample = self.evaluate(distributions.sample()) clipped = self.evaluate(common.clip_to_spec(sample, output_spec)) np.testing.assert_almost_equal(clipped, sample)
def _critic_normal_projection_net(output_spec, init_stddev=0.35, init_means_output_factor=0.1): del init_stddev # std_bias_initializer_value = round(np.log(init_action_stddev + 1e-10), 3) return normal_projection_network.NormalProjectionNetwork( output_spec, init_means_output_factor=init_means_output_factor, # std_bias_initializer_value=std_bias_initializer_value, mean_transform=None, std_transform=sac_agent.std_clip_transform, state_dependent_std=True, scale_distribution=False)
def testBuildStateDepStddev(self): output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1) network = normal_projection_network.NormalProjectionNetwork( output_spec, state_dependent_std=True, scale_distribution=False) inputs = _get_inputs(batch_size=3, num_input_dims=5) distribution, _ = network(inputs, outer_rank=1) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual(tfp.distributions.Normal, type(distribution)) means, stds = distribution.loc, distribution.scale self.assertAllEqual(means.shape.as_list(), [3] + output_spec.shape.as_list()) self.assertAllEqual(stds.shape.as_list(), [3] + output_spec.shape.as_list())
def testBuild(self): output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1) network = normal_projection_network.NormalProjectionNetwork( output_spec, scale_distribution=False) inputs = _get_inputs(batch_size=3, num_input_dims=5) distribution, _ = network(inputs, outer_rank=1) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertIsInstance( distribution, tfp.distributions.MultivariateNormalDiag) means, stds = distribution.mean(), distribution.stddev() self.assertAllEqual(means.shape.as_list(), [3] + output_spec.shape.as_list()) self.assertAllEqual(stds.shape.as_list(), [3] + output_spec.shape.as_list())