Exemple #1
0
def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units,
                                     joint_fc_layer_units):
    """Create a sequential critic network."""

    # Split the inputs into observations and actions.
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    # Create an observation network.
    obs_network = (create_fc_network(obs_fc_layer_units)
                   if obs_fc_layer_units else create_identity_layer())

    # Create an action network.
    action_network = (create_fc_network(action_fc_layer_units)
                      if action_fc_layer_units else create_identity_layer())

    # Create a joint network.
    joint_network = (create_fc_network(joint_fc_layer_units)
                     if joint_fc_layer_units else create_identity_layer())

    # Final layer.
    value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform')

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_layer,
        inner_reshape.InnerReshape(current_shape=[1], new_shape=[])
    ],
                                 name='sequential_critic')
Exemple #2
0
def create_critic_network(obs_fc_layer_units, action_fc_layer_units,
                          joint_fc_layer_units):
    """Create a critic network for DDPG."""
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    obs_network = create_fc_network(
        obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()
    action_network = create_fc_network(
        action_fc_layer_units
    ) if action_fc_layer_units else create_identity_layer()
    joint_network = create_fc_network(
        joint_fc_layer_units
    ) if joint_fc_layer_units else create_identity_layer()
    value_fc_layer = tf.keras.layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003,
                                                               maxval=0.003))

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_fc_layer,
        inner_reshape.InnerReshape([1], [])
    ])
Exemple #3
0
  def testCreateAndCall(self):
    net = sequential.Sequential([
        nest_map.NestMap(
            {'inp1': tf.keras.layers.Dense(8),
             'inp2': sequential.Sequential([
                 tf.keras.layers.Conv2D(2, 3),
                 # Convert 3 inner dimensions to [8] for RNN.
                 inner_reshape.InnerReshape([None] * 3, [8]),
             ]),
             'inp3': tf.keras.layers.LSTM(
                 8, return_state=True, return_sequences=True)}),
        nest_map.NestFlatten(),
        tf.keras.layers.Add()])
    self.assertEqual(
        net.state_spec,
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],),
        },))
    output_spec = net.create_variables(
        {
            'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32),
            'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32),
            'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32),
        })
    self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32))

    inputs = {
        'inp1': tf.ones((8, 10, 3), dtype=tf.float32),
        'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32),
        'inp3': tf.ones((8, 10, 3), dtype=tf.float32)
    }
    output, next_state = net(inputs)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))

    # Test passing in a state.
    output, next_state = net(inputs, next_state)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))
def create_sequential_actor_net(fc_layer_units, action_tensor_spec):
    """Helper function for creating the actor network."""
    def create_dist(loc_and_scale):

        ndims = action_tensor_spec.shape.num_elements()
        return tfp.distributions.MultivariateNormalDiag(
            loc=loc_and_scale[..., :ndims],
            scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]),
            validate_args=True)

    def means_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=0.1),
            name='means_projection_layer')

    def std_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        std_kernel_initializer_scale = 0.1
        std_bias_initializer_value = np.log(np.exp(0.35) - 1)
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=std_kernel_initializer_scale),
            bias_initializer=tf.keras.initializers.Constant(
                value=std_bias_initializer_value))

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.nn.tanh,
        kernel_initializer=tf.keras.initializers.Orthogonal())

    return sequential.Sequential(
        [dense(num_units) for num_units in fc_layer_units] +
        [tf.keras.layers.Lambda(lambda x: {
            'loc': x,
            'scale': x
        })] +
        [nest_map.NestMap({
            'loc': means_layers(),
            'scale': std_layers()
        })] + [nest_map.NestFlatten()] +
        # Concatenate the maen and standard deviation output to feed into the
        # distribution layer.
        [tf.keras.layers.Concatenate(axis=-1)] +
        # Create the output distribution from the mean and standard deviation.
        [tf.keras.layers.Lambda(create_dist)])
Exemple #5
0
 def testNestFlatten(self):
     layer = nest_map.NestFlatten()
     outputs = layer({'a': 1, 'b': 2})
     self.assertEqual(self.evaluate(outputs), [1, 2])