Beispiel #1
0
    def testIncompatibleStructureInputs(self):
        with self.assertRaisesRegex(
                TypeError,
                r'`nested_layers` and `input_spec` do not have matching structures'
        ):
            nest_map.NestMap([tf.keras.layers.Dense(8)],
                             input_spec={'ick': tf.TensorSpec(8, tf.float32)})

        with self.assertRaisesRegex(
                TypeError,
                r'`self.nested_layers` and `inputs` do not have matching structures'
        ):
            net = nest_map.NestMap([tf.keras.layers.Dense(8)])
            net.create_variables(
                {'ick': tf.TensorSpec((1, ), dtype=tf.float32)})

        with self.assertRaisesRegex(
                TypeError,
                r'`self.nested_layers` and `inputs` do not have matching structures'
        ):
            net = nest_map.NestMap([tf.keras.layers.Dense(8)])
            net({'ick': tf.constant([[1.0]])})

        with self.assertRaisesRegex(
                ValueError,
                r'`network_state` and `state_spec` do not have matching structures'
        ):
            net = nest_map.NestMap(
                tf.keras.layers.LSTM(8,
                                     return_state=True,
                                     return_sequences=True))
            net(tf.ones((1, 2)), network_state=(tf.ones((1, 1)), ()))
Beispiel #2
0
 def testNestedNest(self):
     # layer structure: {'a': {'b': .}}
     net = nest_map.NestMap(
         {'a': nest_map.NestMap({'b': tf.keras.layers.Dense(8)})})
     net.create_variables(
         {'a': {
             'b': tf.TensorSpec((1, ), dtype=tf.float32)
         }})
Beispiel #3
0
def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units,
                                     joint_fc_layer_units):
    """Create a sequential critic network."""

    # Split the inputs into observations and actions.
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    # Create an observation network.
    obs_network = (create_fc_network(obs_fc_layer_units)
                   if obs_fc_layer_units else create_identity_layer())

    # Create an action network.
    action_network = (create_fc_network(action_fc_layer_units)
                      if action_fc_layer_units else create_identity_layer())

    # Create a joint network.
    joint_network = (create_fc_network(joint_fc_layer_units)
                     if joint_fc_layer_units else create_identity_layer())

    # Final layer.
    value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform')

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_layer,
        inner_reshape.InnerReshape(current_shape=[1], new_shape=[])
    ],
                                 name='sequential_critic')
Beispiel #4
0
    def testPolicySaverCompatibility(self):
        observation_spec = {
            'a': tf.TensorSpec(4, tf.float32),
            'b': tf.TensorSpec(3, tf.float32)
        }
        time_step_tensor_spec = ts.time_step_spec(observation_spec)
        net = nest_map.NestMap({
            'a':
            tf.keras.layers.LSTM(8, return_state=True, return_sequences=True),
            'b':
            tf.keras.layers.Dense(8)
        })
        net.create_variables(observation_spec)
        policy = MyPolicy(time_step_tensor_spec, net)

        sample = tensor_spec.sample_spec_nest(time_step_tensor_spec,
                                              outer_dims=(5, ))

        step = policy.action(sample)
        self.assertEqual(step.action.shape.as_list(), [5, 8])

        train_step = common.create_variable('train_step')
        saver = policy_saver.PolicySaver(policy, train_step=train_step)
        self.initialize_v1_variables()

        with self.cached_session():
            saver.save(os.path.join(FLAGS.test_tmpdir, 'nest_map_model'))
Beispiel #5
0
  def create_sequential_actor_net(self,
                                  fc_layer_units,
                                  action_tensor_spec,
                                  seed=None):
    """Helper method for creating the actor network."""

    self._seed_stream = self.seed_stream_class(
        seed=seed, salt='tf_agents_sequential_layers')

    def _get_seed():
      seed = self._seed_stream()
      if seed is not None:
        seed = seed % sys.maxsize
      return seed

    def create_dist(loc_and_scale):
      loc = loc_and_scale['loc']
      loc = tanh_and_scale_to_spec(loc, action_tensor_spec)

      scale = loc_and_scale['scale']
      scale = tf.math.softplus(scale)

      return tfp.distributions.MultivariateNormalDiag(
          loc=loc, scale_diag=scale, validate_args=True)

    def means_layers():
      # TODO(b/179510447): align these parameters with Schulman 17.
      return tf.keras.layers.Dense(
          action_tensor_spec.shape.num_elements(),
          kernel_initializer=tf.keras.initializers.VarianceScaling(
              scale=0.1, seed=_get_seed()),
          name='means_projection_layer')

    def std_layers():
      # TODO(b/179510447): align these parameters with Schulman 17.
      std_bias_initializer_value = np.log(np.exp(0.35) - 1)
      return bias_layer.BiasLayer(
          bias_initializer=tf.constant_initializer(
              value=std_bias_initializer_value))

    def no_op_layers():
      return tf.keras.layers.Lambda(lambda x: x)

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.nn.tanh,
        kernel_initializer=tf.keras.initializers.Orthogonal(
            seed=_get_seed()))

    return sequential.Sequential(
        [dense(num_units) for num_units in fc_layer_units] +
        [means_layers()] +
        [tf.keras.layers.Lambda(
            lambda x: {'loc': x, 'scale': tf.zeros_like(x)})] +
        [nest_map.NestMap({
            'loc': no_op_layers(),
            'scale': std_layers(),
        })] +
        # Create the output distribution from the mean and standard deviation.
        [tf.keras.layers.Lambda(create_dist)])
Beispiel #6
0
def create_sequential_critic_net():
    value_layer_dict = {
        "patch": patch_pre_layer,
        "color": color_pre_layer,
        "motion": motion_pre_layer
    }
    #   value_layer = sequential.Sequential([
    #       value_layer_dict,
    #       tf.keras.layers.Lambda(tf.nest.flatten),
    #       tf.keras.layers.Concatenate(),
    #       tf.keras.layers.Dense(1)])

    action_layer = tf.keras.layers.Dense(81)

    def sum_value_and_action_out(value_and_action_out):
        value_out_dict, action_out = value_and_action_out
        value_out = tf.concat(tf.nest.flatten(value_out_dict), axis=-1)
        # value_out = value_out_dict
        return tf.reshape(value_out + action_out, [1, -1])

    return sequential.Sequential([
        nest_map.NestMap((value_layer_dict, action_layer)),
        tf.keras.layers.Lambda(sum_value_and_action_out),
        tf.keras.layers.Dense(1)
    ])
Beispiel #7
0
def create_critic_network(obs_fc_layer_units, action_fc_layer_units,
                          joint_fc_layer_units):
    """Create a critic network for DDPG."""
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    obs_network = create_fc_network(
        obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()
    action_network = create_fc_network(
        action_fc_layer_units
    ) if action_fc_layer_units else create_identity_layer()
    joint_network = create_fc_network(
        joint_fc_layer_units
    ) if joint_fc_layer_units else create_identity_layer()
    value_fc_layer = tf.keras.layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003,
                                                               maxval=0.003))

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_fc_layer,
        inner_reshape.InnerReshape([1], [])
    ])
Beispiel #8
0
 def testAllZeroLengthStateSpecsShowAsEmptyState(self):
     sequential = sequential_lib.Sequential([
         nest_map.NestMap({
             'a': tf.keras.layers.Dense(2),
             'b': tf.keras.layers.Dense(3),
         })
     ])
     self.assertEqual(sequential.state_spec, ())
Beispiel #9
0
 def testNestedNestWithNestedState(self):
     # layer structure: (., {'a': {'b': .}})
     net = nest_map.NestMap((tf.keras.layers.Dense(7), {
         'a':
         nest_map.NestMap({
             'b':
             tf.keras.layers.LSTM(8,
                                  return_state=True,
                                  return_sequences=True)
         })
     }))
     # TODO(b/177337002): remove the forced tuple wrapping the LSTM
     # state once we make a generic KerasWrapper network and clean up
     # Sequential and NestMap to use that instead of singleton Sequential.
     out, state = net((tf.ones((1, 2)), {
         'a': {
             'b': tf.ones((1, 2))
         }
     }),
                      network_state=((), {
                          'a': {
                              'b': ((tf.ones((1, 8)), tf.ones((1, 8))), )
                          }
                      }))
     nest_utils.assert_matching_dtypes_and_inner_shapes(
         out, (tf.TensorSpec(dtype=tf.float32, shape=(7, )), {
             'a': {
                 'b': tf.TensorSpec(dtype=tf.float32, shape=(8, ))
             }
         }),
         caller=self,
         tensors_name='out',
         specs_name='out_expected')
     nest_utils.assert_matching_dtypes_and_inner_shapes(
         state, ((), {
             'a': {
                 'b': ((tf.TensorSpec(dtype=tf.float32, shape=(8, )),
                        tf.TensorSpec(dtype=tf.float32, shape=(8, ))), )
             }
         }),
         caller=self,
         tensors_name='state',
         specs_name='state_expected')
Beispiel #10
0
  def testCreateAndCall(self):
    net = sequential.Sequential([
        nest_map.NestMap(
            {'inp1': tf.keras.layers.Dense(8),
             'inp2': sequential.Sequential([
                 tf.keras.layers.Conv2D(2, 3),
                 # Convert 3 inner dimensions to [8] for RNN.
                 inner_reshape.InnerReshape([None] * 3, [8]),
             ]),
             'inp3': tf.keras.layers.LSTM(
                 8, return_state=True, return_sequences=True)}),
        nest_map.NestFlatten(),
        tf.keras.layers.Add()])
    self.assertEqual(
        net.state_spec,
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],),
        },))
    output_spec = net.create_variables(
        {
            'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32),
            'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32),
            'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32),
        })
    self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32))

    inputs = {
        'inp1': tf.ones((8, 10, 3), dtype=tf.float32),
        'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32),
        'inp3': tf.ones((8, 10, 3), dtype=tf.float32)
    }
    output, next_state = net(inputs)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))

    # Test passing in a state.
    output, next_state = net(inputs, next_state)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))
Beispiel #11
0
def create_sequential_actor_network(actor_fc_layers, action_tensor_spec):
    """Create a sequential actor network."""
    def tile_as_nest(non_nested_output):
        return tf.nest.map_structure(lambda _: non_nested_output,
                                     action_tensor_spec)

    return sequential.Sequential(
        [dense(num_units) for num_units in actor_fc_layers] +
        [tf.keras.layers.Lambda(tile_as_nest)] + [
            nest_map.NestMap(
                tf.nest.map_structure(_TanhNormalProjectionNetworkWrapper,
                                      action_tensor_spec))
        ])
Beispiel #12
0
def create_sequential_actor_net(fc_layer_units, action_tensor_spec):
    """Helper function for creating the actor network."""
    def create_dist(loc_and_scale):

        ndims = action_tensor_spec.shape.num_elements()
        return tfp.distributions.MultivariateNormalDiag(
            loc=loc_and_scale[..., :ndims],
            scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]),
            validate_args=True)

    def means_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=0.1),
            name='means_projection_layer')

    def std_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        std_kernel_initializer_scale = 0.1
        std_bias_initializer_value = np.log(np.exp(0.35) - 1)
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=std_kernel_initializer_scale),
            bias_initializer=tf.keras.initializers.Constant(
                value=std_bias_initializer_value))

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.nn.tanh,
        kernel_initializer=tf.keras.initializers.Orthogonal())

    return sequential.Sequential(
        [dense(num_units) for num_units in fc_layer_units] +
        [tf.keras.layers.Lambda(lambda x: {
            'loc': x,
            'scale': x
        })] +
        [nest_map.NestMap({
            'loc': means_layers(),
            'scale': std_layers()
        })] + [nest_map.NestFlatten()] +
        # Concatenate the maen and standard deviation output to feed into the
        # distribution layer.
        [tf.keras.layers.Concatenate(axis=-1)] +
        # Create the output distribution from the mean and standard deviation.
        [tf.keras.layers.Lambda(create_dist)])
Beispiel #13
0
def create_sequential_critic_net(l2_regularization_weight=0.0,
                                 shared_layer=None):
  value_layer = tf.keras.layers.Dense(
      1,
      kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight),
      kernel_initializer=tf.initializers.constant([[0], [1]]),
      bias_initializer=tf.initializers.constant([[0]]))
  if shared_layer:
    value_layer = sequential.Sequential([value_layer, shared_layer])

  action_layer = tf.keras.layers.Dense(
      1,
      kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight),
      kernel_initializer=tf.initializers.constant([[1]]),
      bias_initializer=tf.initializers.constant([[0]]))

  def sum_value_and_action_out(value_and_action_out):
    value_out, action_out = value_and_action_out
    return tf.reshape(value_out + action_out, [-1])

  return sequential.Sequential([
      nest_map.NestMap((value_layer, action_layer)),
      tf.keras.layers.Lambda(sum_value_and_action_out)
  ])