예제 #1
0
  def testNumericKerasInput(self):
    key = 'feature_key'
    batch_size = 3
    state_dims = 5
    input_shape = (batch_size, state_dims)
    keras_input = tf.keras.Input(shape=[state_dims], dtype=tf.int32, name=key)
    state = {key: tf.ones(input_shape, tf.int32)}
    input_spec = {key: tensor_spec.TensorSpec([state_dims], tf.int32)}

    network = encoding_network.EncodingNetwork(
        input_spec, preprocessing_combiner=tf.keras.Sequential([keras_input]))

    output, _ = network(state)
    self.assertEqual(input_shape, output.shape)
예제 #2
0
    def test_empty_layers(self):
        input_spec = tensor_spec.TensorSpec((2, 3), tf.float32)
        network = encoding_network.EncodingNetwork(input_spec, )

        variables = network.variables
        self.assertEqual(0, len(variables))

        # Only one layer to flatten input.
        self.assertEqual(1, len(network.layers))
        config = network.layers[0].get_config()
        self.assertEqual('flatten', config['name'])

        out, _ = network(tf.ones((1, 2, 3)))
        self.assertAllEqual(out, [[1, 1, 1, 1, 1, 1]])
def create_feed_forward_common_tower_network(observation_spec, global_layers,
                                             arm_layers, common_layers):
    """Creates a common tower network with feedforward towers.

  Args:
    observation_spec: A nested tensor spec containing the specs for global as
      well as per-arm observations.
    global_layers: Iterable of ints. Specifies the layers of the global tower.
    arm_layers: Iterable of ints. Specifies the layers of the arm tower.
    common_layers: Iterable of ints. Specifies the layers of the common tower.

  Returns:
    A network that takes observations adhering observation_spec and outputs
    reward estimates for every action.
  """
    global_network = encoding_network.EncodingNetwork(
        input_tensor_spec=observation_spec[
            bandit_spec_utils.GLOBAL_FEATURE_KEY],
        fc_layer_params=global_layers)

    one_dim_per_arm_obs = tensor_spec.TensorSpec(shape=observation_spec[
        bandit_spec_utils.PER_ARM_FEATURE_KEY].shape[1:],
                                                 dtype=tf.float32)
    arm_network = encoding_network.EncodingNetwork(
        input_tensor_spec=one_dim_per_arm_obs, fc_layer_params=arm_layers)
    common_input_dim = global_layers[-1] + arm_layers[-1]
    common_input_spec = tensor_spec.TensorSpec(shape=(common_input_dim, ),
                                               dtype=tf.float32)
    common_network = q_network.QNetwork(
        input_tensor_spec=common_input_spec,
        action_spec=tensor_spec.BoundedTensorSpec(shape=(),
                                                  minimum=0,
                                                  maximum=0,
                                                  dtype=tf.int32),
        fc_layer_params=common_layers)
    return GlobalAndArmCommonTowerNetwork(observation_spec, global_network,
                                          arm_network, common_network)
예제 #4
0
    def test_empty_layers(self):
        input_spec = tensor_spec.TensorSpec((2, 3), tf.float32)
        network = encoding_network.EncodingNetwork(input_spec, )

        with self.assertRaises(ValueError):
            network.variables  # pylint: disable=pointless-statement

        # Only one layer to flatten input.
        self.assertLen(network.layers, 1)
        config = network.layers[0].get_config()
        self.assertEqual('flatten', config['name'])

        out, _ = network(tf.ones((1, 2, 3)))
        self.assertAllEqual(out, [[1, 1, 1, 1, 1, 1]])
        self.assertLen(network.variables, 0)
예제 #5
0
    def testNumericFeatureColumnInput(self):
        key = 'feature_key'
        batch_size = 3
        state_dims = 5
        input_shape = (batch_size, state_dims)
        column = tf.feature_column.numeric_column(key, [state_dims])
        state = {key: tf.ones(input_shape, tf.int32)}
        input_spec = {key: tensor_spec.TensorSpec([state_dims], tf.int32)}

        dense_features = tf.compat.v2.keras.layers.DenseFeatures([column])
        network = encoding_network.EncodingNetwork(
            input_spec, preprocessing_combiner=dense_features)

        output, _ = network(state)
        self.assertEqual(input_shape, output.shape)
예제 #6
0
    def testCombinedFeatureColumnInput(self):
        columns = {}
        tensors = {}
        specs = {}
        expected_dim = 0

        indicator_key = 'indicator_key'
        vocab_list = [2, 3, 4]
        column1 = tf.feature_column.categorical_column_with_vocabulary_list(
            indicator_key, vocab_list)
        columns[indicator_key] = tf.feature_column.indicator_column(column1)
        state_input = [3, 2, 2, 4, 3]
        tensors[indicator_key] = tf.expand_dims(state_input, -1)
        specs[indicator_key] = tensor_spec.TensorSpec([1], tf.int32)
        expected_dim += len(vocab_list)

        # TODO(b/134950354): Test embedding column for non-eager mode only for now.
        if not tf.executing_eagerly():
            embedding_key = 'embedding_key'
            embedding_dim = 3
            vocab_list = [2, 3, 4]
            column2 = tf.feature_column.categorical_column_with_vocabulary_list(
                embedding_key, vocab_list)
            columns[embedding_key] = tf.feature_column.embedding_column(
                column2, embedding_dim)
            state_input = [3, 2, 2, 4, 3]
            tensors[embedding_key] = tf.expand_dims(state_input, -1)
            specs[embedding_key] = tensor_spec.TensorSpec([1], tf.int32)
            expected_dim += embedding_dim

        numeric_key = 'numeric_key'
        batch_size = 5
        state_dims = 3
        input_shape = (batch_size, state_dims)
        columns[numeric_key] = tf.feature_column.numeric_column(
            numeric_key, [state_dims])
        tensors[numeric_key] = tf.ones(input_shape, tf.int32)
        specs[numeric_key] = tensor_spec.TensorSpec([state_dims], tf.int32)
        expected_dim += state_dims

        dense_features = tf.compat.v2.keras.layers.DenseFeatures(
            columns.values())
        network = encoding_network.EncodingNetwork(
            specs, preprocessing_combiner=dense_features)

        output, _ = network(tensors)
        expected_shape = (batch_size, expected_dim)
        self.assertEqual(expected_shape, output.shape)
예제 #7
0
 def testDropoutFCLayers(self, training):
     batch_size = 3
     num_obs_dims = 5
     obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32)
     network = encoding_network.EncodingNetwork(obs_spec,
                                                fc_layer_params=[20],
                                                dropout_layer_params=[0.5])
     obs = tf.random.uniform([batch_size, num_obs_dims])
     output1, _ = network(obs, training=training)
     output2, _ = network(obs, training=training)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     output1, output2 = self.evaluate([output1, output2])
     if training:
         self.assertGreater(np.linalg.norm(output1 - output2), 0)
     else:
         self.assertAllEqual(output1, output2)
    def __init__(self,
                 observation_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 enable_last_layer_zero_initializer=False,
                 name='ActorNetwork'):
        super(ActorNetwork, self).__init__(input_tensor_spec=observation_spec,
                                           state_spec=(),
                                           name=name)

        # For simplicity we will only support a single action float output.
        self._action_spec = action_spec
        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]
        if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
            raise ValueError(
                'Only float actions are supported by this network.')

        kernel_initializer = tf.keras.initializers.VarianceScaling(
            scale=1. / 3., mode='fan_in', distribution='uniform')
        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=False)

        initializer = tf.keras.initializers.RandomUniform(minval=-0.003,
                                                          maxval=0.003)

        self._action_projection_layer = tf.keras.layers.Dense(
            flat_action_spec[0].shape.num_elements(),
            activation=tf.keras.activations.tanh,
            kernel_initializer=initializer,
            name='action')
예제 #9
0
    def testIndicatorFeatureColumnInput(self):
        key = 'feature_key'
        vocab_list = [2, 3, 4]
        column = tf.feature_column.categorical_column_with_vocabulary_list(
            key, vocab_list)
        column = tf.feature_column.indicator_column(column)

        state_input = [3, 2, 2, 4, 3]
        state = {key: tf.expand_dims(state_input, -1)}
        input_spec = {key: tensor_spec.TensorSpec([1], tf.int32)}

        dense_features = tf.compat.v2.keras.layers.DenseFeatures([column])
        network = encoding_network.EncodingNetwork(
            input_spec, preprocessing_combiner=dense_features)

        output, _ = network(state)
        expected_shape = (len(state_input), len(vocab_list))
        self.assertEqual(expected_shape, output.shape)
예제 #10
0
 def __init__(
         self,
         observation_spec,
         action_spec,
         preprocessing_layers=None,
         preprocessing_combiner=None,
         conv_layer_params=None,
         fc_layer_params=(75, 40),
         dropout_layer_params=None,
         # enable_last_layer_zero_initializer=False,
         name='ActorNetwork'):
     # call super
     super(CustomActorNetwork,
           self).__init__(input_tensor_spec=observation_spec,
                          state_spec=(),
                          name=name)
     # check action_spec
     self._action_spec = action_spec
     flat_action_spec = tf.nest.flatten(action_spec)
     if len(flat_action_spec) != 1:
         raise ValueError(
             'flatten action_spec should be len=2, but get len={}'.format(
                 len(flat_action_spec)))
     self._single_action_spec = flat_action_spec[0]
     # set up kernel_initializer
     # kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
     # set up encoder_network
     self._encoder = encoding_network.EncodingNetwork(
         observation_spec,
         preprocessing_layers=preprocessing_layers,
         preprocessing_combiner=preprocessing_combiner,
         conv_layer_params=conv_layer_params,
         fc_layer_params=fc_layer_params,
         dropout_layer_params=dropout_layer_params,
         activation_fn=tf.keras.activations.relu,
         # kernel_initializer=kernel_initializer,
         batch_squash=False)
     # set up action_projection layer
     # initializer = tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)
     self._action_projection_layer = tf.keras.layers.Dense(
         flat_action_spec[0].shape.num_elements(),
         activation=tf.keras.activations.tanh,
         # kernel_initializer=initializer,
         name='action_projection_layer')
예제 #11
0
    def __init__(self,
                 input_tensor_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='Q4Network'):

        num_actions = action_spec.maximum - action_spec.minimum + 1
        encoder_input_tensor_spec = input_tensor_spec

        encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype)

        q_value_layer = tf.keras.layers.Dense(
            num_actions * 4,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
            bias_initializer=tf.compat.v1.initializers.constant(-0.2),
            dtype=dtype)

        super(Q4Network, self).__init__(input_tensor_spec=input_tensor_spec,
                                        state_spec=(),
                                        name=name)

        self._encoder = encoder
        self._q_value_layer = q_value_layer
예제 #12
0
    def test_dict_spec_and_pre_processing(self):
        input_spec = {
            'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32),
            'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32)
        }
        network = encoding_network.EncodingNetwork(
            input_spec,
            preprocessing_layers={
                'a': tf.keras.layers.Flatten(),
                'b': tf.keras.layers.Flatten()
            },
            fc_layer_params=(),
            preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
            activation_fn=tf.keras.activations.tanh,
        )

        sample_input = tensor_spec.sample_spec_nest(input_spec)
        output, _ = network(sample_input)
        # 6144 is the shape from a concat of flat (32, 32, 3) x2.
        self.assertEqual((6144, ), output.shape)
예제 #13
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(64, 64),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 enable_last_layer_zero_initializer=False,
                 name='CustomActorNetwork'):
        super().__init__(input_tensor_spec=observation_spec,
                         state_spec=(),
                         name=name)

        self._action_spec = action_spec
        flat_action_spec = tf.nest.flatten(action_spec)
        self._single_action_spec = flat_action_spec[0]
        kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform()

        # kernel_initializer=tf.keras.initializers.VarianceScaling(
        # scale=1./3., mode='fan_in', distribution='uniform')

        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=False)

        initializer = tf.keras.initializers.RandomUniform(-0.001, 0.001)

        self._action_projection_layer = tf.keras.layers.Dense(
            flat_action_spec[0].shape.num_elements(),
            activation=tf.keras.activations.tanh,
            kernel_initializer=initializer,
            name='action')
예제 #14
0
    def test_non_preprocessing_layers_2d(self):
        input_spec = tensor_spec.TensorSpec((32, 32, 3), tf.float32)
        network = encoding_network.EncodingNetwork(
            input_spec,
            conv_layer_params=((16, 2, 1), (15, 2, 1)),
            fc_layer_params=(10, 5, 2),
            activation_fn=tf.keras.activations.tanh,
        )

        network.create_variables()

        variables = network.variables
        self.assertLen(variables, 10)
        self.assertLen(network.layers, 6)

        # Validate first conv layer.
        config = network.layers[0].get_config()
        self.assertEqual('tanh', config['activation'])
        self.assertEqual((2, 2), config['kernel_size'])
        self.assertEqual(16, config['filters'])
        self.assertEqual((1, 1), config['strides'])
        self.assertTrue(config['trainable'])

        # Validate second conv layer.
        config = network.layers[1].get_config()
        self.assertEqual('tanh', config['activation'])
        self.assertEqual((2, 2), config['kernel_size'])
        self.assertEqual(15, config['filters'])
        self.assertEqual((1, 1), config['strides'])
        self.assertTrue(config['trainable'])

        # Validate flatten layer.
        config = network.layers[2].get_config()
        self.assertEqual('flatten', config['name'])

        # Validate dense layers.
        self.assertEqual(10, network.layers[3].get_config()['units'])
        self.assertEqual(5, network.layers[4].get_config()['units'])
        self.assertEqual(2, network.layers[5].get_config()['units'])
예제 #15
0
 def test_layers_buildable(self):
     input_spec = {
         'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32),
         'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32)
     }
     network = encoding_network.EncodingNetwork(
         input_spec,
         preprocessing_layers={
             'a':
             tf.keras.Sequential([
                 tf.keras.layers.Dense(4, activation='tanh'),
                 tf.keras.layers.Flatten()
             ]),
             'b':
             tf.keras.layers.Flatten()
         },
         fc_layer_params=(),
         preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
         activation_fn=tf.keras.activations.tanh,
     )
     network.create_variables()
     self.assertNotEmpty(network.variables)
예제 #16
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 enable_last_layer_zero_initializer=False,
                 name='ActorNetwork'):
        super().__init__(input_tensor_spec=observation_spec,
                         state_spec=(),
                         name=name)
        self._action_spec = action_spec
        flat_action_spec = tf.nest.flatten(action_spec)
        self._single_action_spec = flat_action_spec[0]

        kernel_initializer = tf.keras.initializers.VarianceScaling(
            scale=1.0 / 3.0, mode='fan_in', distribution='uniform')
        self._encoder = \
            encoding_network.EncodingNetwork(observation_spec,
                                             preprocessing_layers=preprocessing_layers,
                                             preprocessing_combiner=preprocessing_combiner,
                                             conv_layer_params=conv_layer_params,
                                             fc_layer_params=fc_layer_params,
                                             dropout_layer_params=dropout_layer_params,
                                             activation_fn=activation_fn,
                                             kernel_initializer=kernel_initializer,
                                             batch_squash=False)

        initializer = tf.keras.initializers.RandomUniform(minval=-0.003,
                                                          maxval=0.003)
        self._action_projection_layer = tf.keras.layers.Dense(
            9,
            activation=tf.keras.activations.tanh,
            kernel_initializer=initializer,
            name='action')
예제 #17
0
  def testKerasIntegerLookup(self):
    if not tf.executing_eagerly():
      self.skipTest('This test is TF2 only.')

    key = 'feature_key'
    vocab_list = [2, 3, 4]

    keras_input = tf.keras.Input(shape=(1,), name=key, dtype=tf.dtypes.int32)
    id_input = keras_preprocessing.IntegerLookup(vocabulary=vocab_list)
    encoded_input = keras_preprocessing.CategoryEncoding(
        max_tokens=len(vocab_list))

    state_input = [3, 2, 2, 4, 3]
    state = {key: tf.expand_dims(state_input, -1)}
    input_spec = {key: tensor_spec.TensorSpec([1], tf.int32)}

    network = encoding_network.EncodingNetwork(
        input_spec,
        preprocessing_combiner=tf.keras.Sequential(
            [keras_input, id_input, encoded_input]))

    output, _ = network(state)
    expected_shape = (len(state_input), len(vocab_list))
    self.assertEqual(expected_shape, output.shape)
예제 #18
0
def create_feed_forward_common_tower_network(
    observation_spec: types.NestedTensorSpec,
    global_layers: Sequence[int],
    arm_layers: Sequence[int],
    common_layers: Sequence[int],
    output_dim: int = 1,
    global_preprocessing_combiner: Optional[Callable[..., types.Tensor]] = None,
    arm_preprocessing_combiner: Optional[Callable[..., types.Tensor]] = None,
    activation_fn: Callable[[types.Tensor],
                            types.Tensor] = tf.keras.activations.relu
) -> types.Network:
  """Creates a common tower network with feedforward towers.

  The network produced by this function can be used either in
  `GreedyRewardPredictionPolicy`, or `NeuralLinUCBPolicy`.
  In the former case, the network must have `output_dim=1`, it is going to be an
  instance of `QNetwork`, and used in the policy as a reward prediction network.
  In the latter case, the network will be an encoding network with its output
  consumed by a reward layer or a LinUCB method. The specified `output_dim` will
  be the encoding dimension.

  Args:
    observation_spec: A nested tensor spec containing the specs for global as
      well as per-arm observations.
    global_layers: Iterable of ints. Specifies the layers of the global tower.
    arm_layers: Iterable of ints. Specifies the layers of the arm tower.
    common_layers: Iterable of ints. Specifies the layers of the common tower.
    output_dim: The output dimension of the network. If 1, the common tower will
      be a QNetwork. Otherwise, the common tower will be an encoding network
      with the specified output dimension.
    global_preprocessing_combiner: Preprocessing combiner for global features.
    arm_preprocessing_combiner: Preprocessing combiner for the arm features.
    activation_fn: A keras activation, specifying the activation function used
      in all layers. Defaults to relu.

  Returns:
    A network that takes observations adhering observation_spec and outputs
    reward estimates for every action.
  """
  obs_spec_no_num_actions = _remove_num_actions_dim_from_spec(observation_spec)
  global_network = encoding_network.EncodingNetwork(
      input_tensor_spec=obs_spec_no_num_actions[
          bandit_spec_utils.GLOBAL_FEATURE_KEY],
      fc_layer_params=global_layers,
      activation_fn=activation_fn,
      preprocessing_combiner=global_preprocessing_combiner)

  arm_network = encoding_network.EncodingNetwork(
      input_tensor_spec=obs_spec_no_num_actions[
          bandit_spec_utils.PER_ARM_FEATURE_KEY],
      fc_layer_params=arm_layers,
      activation_fn=activation_fn,
      preprocessing_combiner=arm_preprocessing_combiner)
  common_input_dim = global_layers[-1] + arm_layers[-1]
  common_input_spec = tensor_spec.TensorSpec(
      shape=(common_input_dim,), dtype=tf.float32)
  if output_dim == 1:
    common_network = q_network.QNetwork(
        input_tensor_spec=common_input_spec,
        action_spec=tensor_spec.BoundedTensorSpec(
            shape=(), minimum=0, maximum=0, dtype=tf.int32),
        fc_layer_params=common_layers,
        activation_fn=activation_fn)
  else:
    common_network = encoding_network.EncodingNetwork(
        input_tensor_spec=common_input_spec,
        fc_layer_params=list(common_layers) + [output_dim],
        activation_fn=activation_fn)
  return GlobalAndArmCommonTowerNetwork(obs_spec_no_num_actions, global_network,
                                        arm_network, common_network)
예제 #19
0
  def __init__(
      self,
      input_tensor_spec,
      preprocessing_layers=None,
      preprocessing_combiner=None,
      conv_layer_params=None,
      input_fc_layer_params=(75, 40),
      lstm_size=(40,),
      output_fc_layer_params=(75, 40),
      activation_fn=tf.keras.activations.relu,
      dtype=tf.float32,
      name='LSTMEncodingNetwork',
  ):
    """Creates an instance of `LSTMEncodingNetwork`.

    Input preprocessing is possible via `preprocessing_layers` and
    `preprocessing_combiner` Layers.  If the `preprocessing_layers` nest is
    shallower than `input_tensor_spec`, then the layers will get the subnests.
    For example, if:

    ```python
    input_tensor_spec = ([TensorSpec(3)] * 2, [TensorSpec(3)] * 5)
    preprocessing_layers = (Layer1(), Layer2())
    ```

    then preprocessing will call:

    ```python
    preprocessed = [preprocessing_layers[0](observations[0]),
                    preprocessing_layers[1](obsrevations[1])]
    ```

    However if

    ```python
    preprocessing_layers = ([Layer1() for _ in range(2)],
                            [Layer2() for _ in range(5)])
    ```

    then preprocessing will call:
    ```python
    preprocessed = [
      layer(obs) for layer, obs in zip(flatten(preprocessing_layers),
                                       flatten(observations))
    ]
    ```

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them.  Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These feed into the
        recurrent layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These are applied on top
        of the recurrent layer.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      dtype: The dtype to use by the convolution, LSTM, and fully connected
        layers.
      name: A string representing name of the network.

    Raises:
      ValueError: If any of `preprocessing_layers` is already built.
      ValueError: If `preprocessing_combiner` is already built.
    """
    kernel_initializer = tf.compat.v1.variance_scaling_initializer(
        scale=2.0, mode='fan_in', distribution='truncated_normal')

    input_encoder = encoding_network.EncodingNetwork(
        input_tensor_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=preprocessing_combiner,
        conv_layer_params=conv_layer_params,
        fc_layer_params=input_fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        dtype=dtype)

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(
          lstm_size[0],
          dtype=dtype,
          implementation=KERAS_LSTM_FUSED_IMPLEMENTATION)
    else:
      cell = tf.keras.layers.StackedRNNCells([
          tf.keras.layers.LSTMCell(  # pylint: disable=g-complex-comprehension
              size,
              dtype=dtype,
              implementation=KERAS_LSTM_FUSED_IMPLEMENTATION)
          for size in lstm_size
      ])

    output_encoder = []
    if output_fc_layer_params:
      output_encoder = [
          tf.keras.layers.Dense(
              num_units,
              activation=activation_fn,
              kernel_initializer=kernel_initializer,
              dtype=dtype,
              name='/'.join([name, 'dense']))
          for num_units in output_fc_layer_params
      ]

    counter = [-1]
    def create_spec(size):
      counter[0] += 1
      return tensor_spec.TensorSpec(
          size, dtype=dtype, name='network_state_%d' % counter[0])
    state_spec = tf.nest.map_structure(create_spec, cell.state_size)

    super(LSTMEncodingNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=state_spec,
        name=name)

    self._conv_layer_params = conv_layer_params
    self._input_encoder = input_encoder
    self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
    self._output_encoder = output_encoder
예제 #20
0
    def __init__(
            self,
            input_tensor_spec,
            preprocessing_combiner=None,
            joint_fc_layer_params=None,
            joint_dropout_layer_params=None,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            activation_fn=tf.nn.relu,
            name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.

    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      preprocessing_combiner: Combiner layer for obs and action inputs
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default glorot_uniform
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
        observation_spec, action_spec = input_tensor_spec

        if (len(tf.nest.flatten(observation_spec)) > 1
                and preprocessing_combiner is None):
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        preprocessing_layers = None
        # combiner assumes a single batch dimension, without time

        super(CriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        self._encoder = encoding_network.EncodingNetwork(
            input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            fc_layer_params=joint_fc_layer_params,
            dropout_layer_params=joint_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=False)
        self._value_layer = tf.keras.layers.Dense(
            1,
            activation=None,
            kernel_initializer=tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003),
            name='value')
예제 #21
0
    def testCombinedKerasPreprocessingLayers(self):
        if not tf.executing_eagerly():
            self.skipTest('This test is TF2 only.')

        inputs = {}
        features = {}
        tensors = {}
        specs = {}
        expected_dim = 0

        indicator_key = 'indicator_key'
        vocab_list = [2, 3, 4]
        inputs[indicator_key] = tf.keras.Input(shape=(1, ),
                                               dtype=tf.dtypes.int32,
                                               name=indicator_key)
        features[indicator_key] = keras_preprocessing.IntegerLookup(
            vocabulary=vocab_list, num_oov_indices=0,
            output_mode='multi_hot')(inputs[indicator_key])
        state_input = [3, 2, 2, 4, 3]
        tensors[indicator_key] = tf.expand_dims(state_input, -1)
        specs[indicator_key] = tensor_spec.TensorSpec([1], tf.int32)
        expected_dim += len(vocab_list)

        embedding_key = 'embedding_key'
        embedding_dim = 3
        vocab_list = [2, 3, 4]
        inputs[embedding_key] = tf.keras.Input(shape=(1, ),
                                               dtype=tf.dtypes.int32,
                                               name=embedding_key)
        id_input = keras_preprocessing.IntegerLookup(vocabulary=vocab_list,
                                                     num_oov_indices=0)(
                                                         inputs[embedding_key])
        embedding_input = tf.keras.layers.Embedding(
            input_dim=len(vocab_list), output_dim=embedding_dim)(id_input)
        features[embedding_key] = tf.reduce_sum(embedding_input, axis=-2)
        state_input = [3, 2, 2, 4, 3]
        tensors[embedding_key] = tf.expand_dims(state_input, -1)
        specs[embedding_key] = tensor_spec.TensorSpec([1], tf.int32)
        expected_dim += embedding_dim

        numeric_key = 'numeric_key'
        batch_size = 5
        state_dims = 3
        input_shape = (batch_size, state_dims)
        inputs[numeric_key] = tf.keras.Input(shape=[state_dims],
                                             dtype=tf.float32,
                                             name=numeric_key)
        features[numeric_key] = inputs[numeric_key]
        tensors[numeric_key] = tf.ones(input_shape, tf.float32)
        specs[numeric_key] = tensor_spec.TensorSpec([state_dims], tf.float32)
        expected_dim += state_dims
        features = tf.keras.layers.concatenate(features.values(), axis=-1)

        # TODO(b/170645185): Replace Model with FunctionalPreprocessingStage.
        network = encoding_network.EncodingNetwork(
            specs,
            preprocessing_combiner=tf.keras.Model(inputs=inputs,
                                                  outputs=features))
        output, _ = network(tensors)
        expected_shape = (batch_size, expected_dim)
        self.assertEqual(expected_shape, output.shape)
예제 #22
0
    def __init__(self,
                 input_tensor_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 a_fc_layer_params=None,
                 a_weight_decay_params=None,
                 a_dropout_layer_params=None,
                 v_fc_layer_params=None,
                 v_weight_decay_params=None,
                 v_dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 av_combine_fn=None,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='DuelQNetwork'):
        """Creates an instance of `DuelQNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride), used in shared encoder.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer, used in shared encoder
      *_fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer, corresponding to each branch:
        a_fc_layer_params designed for the advantage branch,
        v_fc_layer_params designed for the state branch
      *_weight_decay_params: Optional list of L2 weight decay params, where each
        item is the L2-regularization strength applied to corresponding
        fully_connected layer.The weight decay parameters are interleaved with
        the fully connected layer, except if the list is None.
        Corresponding to each branch:
        a_weight_decay_params for the advantage branch,
                              same length as a_fc_layer_params
        v_weight_decay_params for the state branch,
                              same length as v_fc_layer_params
      *_dropout_layer_params: Optional list of dropout layer parameters, where
        each item is the fraction of input units to drop. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None.
        Corresponding to each branch:
        a_dropout_layer_params for the advantage branch,
                               same length as a_fc_layer_params
        v_dropout_layer_params for the state branch.
                               same length as v_fc_layer_params
      activation_fn: Activation function, e.g. tf.keras.activations.relu.
      av_combine_fn: Function to produce q-value from advantage and state value
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing the name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation. Or
        if `action_spec` contains more than one action.
    """
        q_network.validate_specs(action_spec, input_tensor_spec)
        action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = action_spec.maximum - action_spec.minimum + 1
        encoder_input_tensor_spec = input_tensor_spec

        # Shared encoder to convert observation to shared state tensor
        # which is fed to advantage branch and state branch
        encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype,
            name='shared_encoder')

        # Advantage branch

        # Advantage intermediate fully connected layers
        a_encode_layers = self.create_branch_layers(a_fc_layer_params,
                                                    a_dropout_layer_params,
                                                    a_weight_decay_params,
                                                    activation_fn,
                                                    kernel_initializer,
                                                    dtype,
                                                    name='a_branch_layer')

        # Advantage dense layer to project to action space
        a_value_layer = tf.keras.layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
            bias_initializer=tf.compat.v1.initializers.constant(-0.2),
            dtype=dtype,
            name='a_value_layer')

        # State branch

        # State intermediate fully connected layers
        v_encoder_layers = self.create_branch_layers(v_fc_layer_params,
                                                     v_dropout_layer_params,
                                                     v_weight_decay_params,
                                                     activation_fn,
                                                     kernel_initializer,
                                                     dtype,
                                                     name='v_branch_layer')

        # State dense layer to project to a single scalar state value
        v_value_layer = tf.keras.layers.Dense(
            1,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
            bias_initializer=tf.compat.v1.initializers.constant(-0.2),
            dtype=dtype,
            name='v_value_layer')

        super().__init__(input_tensor_spec=input_tensor_spec,
                         state_spec=(),
                         name=name)

        self._encoder = encoder
        self._a_encode_layers = a_encode_layers
        self._a_value_layer = a_value_layer
        self._v_encode_layers = v_encoder_layers
        self._v_value_layer = v_value_layer

        self._av_combine_fn = av_combine_fn or self.av_combine_f
예제 #23
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 gnn,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 conv_layer_params=None,
                 activation_fn=tf.nn.relu,
                 name='ActorNetwork',
                 params=ParameterServer()):
        """
    Creates an instance of `ActorNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        inputs.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the outputs.
      gnn: The function that initializes a graph neural network that
        accepts the input observations and computes node embeddings.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.
    Raises:
      ValueError: If `input_tensor_spec` or `action_spec` contains more than one
        item, or if the action data type is not `float`.
    """
        super(GNNActorNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        if len(tf.nest.flatten(input_tensor_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(output_tensor_spec)

        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')

        if flat_action_spec[0].dtype not in [tf.float32, tf.float64]:
            raise ValueError(
                'Only float actions are supported by this network.')

        if gnn is None:
            raise ValueError('`gnn` must not be `None`.')

        self._gnn = gnn(name=name, params=params)
        self._latent_trace = None
        self._encoder = encoding_network.EncodingNetwork(
            input_tensor_spec=tf.TensorSpec([None, self._gnn._embedding_size]),
            preprocessing_layers=None,
            preprocessing_combiner=None,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(
            ),
            batch_squash=False,
            dtype=tf.float32)

        self._projection_nets = tf.nest.map_structure(projection_net,
                                                      output_tensor_spec)
        self._output_tensor_spec = tf.nest.map_structure(
            lambda proj_net: proj_net.output_spec, self._projection_nets)
예제 #24
0
    def __init__(
        self,
        observation_spec,
        conv_layer_params=None,
        input_fc_layer_params=(75, 40),
        lstm_size=(40, ),
        output_fc_layer_params=(75, 40),
        activation_fn=tf.keras.activations.relu,
        name='LSTMEncodingNetwork',
    ):
        """Creates an instance of `LSTMEncodingNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These feed into the
        recurrent layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These are applied on top
        of the recurrent layer.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.
    """
        kernel_initializer = tf.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal')

        input_encoder = encoding_network.EncodingNetwork(
            observation_spec,
            conv_layer_params=conv_layer_params,
            fc_layer_params=input_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer)

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        output_encoder = ([
            tf.keras.layers.Dense(num_units,
                                  activation=activation_fn,
                                  kernel_initializer=kernel_initializer,
                                  name='/'.join([name, 'dense']))
            for num_units in output_fc_layer_params
        ])

        state_spec = nest.map_structure(
            functools.partial(tensor_spec.TensorSpec,
                              dtype=tf.float32,
                              name='network_state_spec'), cell.state_size)

        super(LSTMEncodingNetwork,
              self).__init__(observation_spec=observation_spec,
                             action_spec=None,
                             state_spec=state_spec,
                             name=name)

        self._conv_layer_params = conv_layer_params
        self._input_encoder = input_encoder
        self._cell = cell
        self._output_encoder = output_encoder
예제 #25
0
    def __init__(
            self,
            input_tensor_spec,
            # observation_conv_layer_params=None,
            # observation_fc_layer_params=None,
            # observation_dropout_layer_params=None,
            # action_fc_layer_params=None,
            # action_dropout_layer_params=None,
            preprocessing_layers,
            preprocessing_combiner,
            joint_fc_layer_params=None,
            joint_dropout_layer_params=None,
            joint_activation_fn=tf.nn.relu,
            output_activation_fn=None,
            kernel_initializer=None,
            last_kernel_initializer=None,
            name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.
    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.
    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
        super(MultiObservationCriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        observation_spec, action_spec = input_tensor_spec

        flat_action_spec = tf.nest.flatten(action_spec)
        self._single_action_spec = flat_action_spec[0]
        # set up kernel_initializer
        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)
        # set up encoder_network
        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=None,
            fc_layer_params=None,
            dropout_layer_params=None,
            activation_fn=tf.keras.activations.relu,
            kernel_initializer=kernel_initializer,
            batch_squash=False)

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        # self._observation_layers = utils.mlp_layers(
        #     observation_conv_layer_params,
        #     observation_fc_layer_params,
        #     observation_dropout_layer_params,
        #     activation_fn=activation_fn,
        #     kernel_initializer=kernel_initializer,
        #     name='observation_encoding')

        # self._action_layers = utils.mlp_layers(
        #     None,
        #     action_fc_layer_params,
        #     action_dropout_layer_params,
        #     activation_fn=activation_fn,
        #     kernel_initializer=kernel_initializer,
        #     name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            joint_dropout_layer_params,
            activation_fn=joint_activation_fn,
            kernel_initializer=kernel_initializer,
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(1,
                                  activation=output_activation_fn,
                                  kernel_initializer=last_kernel_initializer,
                                  name='value'))
예제 #26
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 batch_squash=True,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 conv_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 last_kernel_initializer=None,
                 name='ActorNetwork'):
        """Creates an instance of `ActorNetwork`.
    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        inputs.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the outputs.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.
    Raises:
      ValueError: If `input_tensor_spec` or `action_spec` contains more than one
        item, or if the action data type is not `float`.
    """

        super(PaintingActorNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        # if len(tf.nest.flatten(input_tensor_spec)) > 1:
        #   raise ValueError('Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(output_tensor_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
            raise ValueError(
                'Only float actions are supported by this network.')

        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)

        encoder = encoding_network.EncodingNetwork(
            input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            name='input_encoding')
        self._encoder = encoder

        self._action_layer = tf.keras.layers.Dense(
            flat_action_spec[0].shape.num_elements(),
            activation=tf.keras.activations.tanh,
            kernel_initializer=last_kernel_initializer,
            name='action')

        self._output_tensor_spec = output_tensor_spec
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 gnn,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(200, 100),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 batch_squash=False,
                 dtype=tf.float32,
                 discrete_projection_net=_categorical_projection_net,
                 continuous_projection_net=_normal_projection_net,
                 name='ActorDistributionNetwork',
                 params=ParameterServer()):
        """
    Creates an instance of `ActorDistributionNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the output.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default glorot_uniform
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      discrete_projection_net: Callable that generates a discrete projection
        network to be called with some hidden state and the outer_rank of the
        state.
      continuous_projection_net: Callable that generates a continuous projection
        network to be called with some hidden state and the outer_rank of the
        state.
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation.
    """

        if not kernel_initializer:
            kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
            )
        if gnn is None:
            raise ValueError('`gnn` must not be `None`.')

        self._gnn = gnn(name=name + "_GNN", params=params)

        encoder = encoding_network.EncodingNetwork(
            input_tensor_spec=tf.TensorSpec([None, self._gnn._embedding_size]),
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype)

        def map_proj(spec):
            if tensor_spec.is_discrete(spec):
                return discrete_projection_net(spec)
            else:
                return continuous_projection_net(spec)

        projection_networks = tf.nest.map_structure(map_proj,
                                                    output_tensor_spec)
        output_spec = tf.nest.map_structure(
            lambda proj_net: proj_net.output_spec, projection_networks)

        super(GNNActorDistributionNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             output_spec=output_spec,
                             name=name)

        self._encoder = encoder
        self._projection_networks = projection_networks
        self._output_tensor_spec = output_tensor_spec
예제 #28
0
    def __init__(self,
                 input_tensor_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='ValueNetwork'):
        """Creates an instance of `ValueNetwork`.

    Network supports calls with shape outer_rank + observation_spec.shape. Note
    outer_rank must be at least 1.

    Args:
      input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs
        representing the input observations.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing name of the network.

    Raises:
      ValueError: If input_tensor_spec is not an instance of network.InputSpec.
    """
        super(ValueNetwork, self).__init__(input_tensor_spec=input_tensor_spec,
                                           state_spec=(),
                                           name=name)

        if not kernel_initializer:
            kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
            )

        self._encoder = encoding_network.EncodingNetwork(
            input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype)

        self._postprocessing_layers = tf.keras.layers.Dense(
            1,
            activation=None,
            kernel_initializer=tf.random_uniform_initializer(minval=-0.03,
                                                             maxval=0.03))
예제 #29
0
    def __init__(self,
                 input_tensor_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='QNetwork'):
        """Creates an instance of `QNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, where
        each item is the fraction of input units to drop. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of fc_layer_params, or be
        None.
      activation_fn: Activation function, e.g. tf.keras.activations.relu.
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing the name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation. Or
        if `action_spec` contains more than one action.
    """
        validate_specs(action_spec, input_tensor_spec)
        action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = action_spec.maximum - action_spec.minimum + 1
        encoder_input_tensor_spec = input_tensor_spec

        encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype)

        q_value_layer = tf.keras.layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.random_uniform_initializer(minval=-0.03,
                                                             maxval=0.03),
            bias_initializer=tf.constant_initializer(-0.2),
            dtype=dtype)

        super(QNetwork, self).__init__(input_tensor_spec=input_tensor_spec,
                                       state_spec=(),
                                       name=name)

        self._encoder = encoder
        self._q_value_layer = q_value_layer
예제 #30
0
  def __init__(self,
               input_tensor_spec,
               preprocessing_layers=None,
               preprocessing_combiner=None,
               batch_squash=True,
               observation_conv_layer_params=None,
               observation_fc_layer_params=None,
               observation_dropout_layer_params=None,
               action_fc_layer_params=None,
               action_dropout_layer_params=None,
               joint_fc_layer_params=None,
               joint_dropout_layer_params=None,
               activation_fn=tf.nn.relu,
               output_activation_fn=None,
               kernel_initializer=None,
               last_kernel_initializer=None,
               name='CriticNetwork'):
    """Creates an instance of `CriticNetwork`.
    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.
    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
    super(PaintingCriticNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=(),
        name=name)

    observation_spec, action_spec = input_tensor_spec

    # if len(tf.nest.flatten(observation_spec)) > 1:
    #   raise ValueError('Only a single observation is supported by this network')

    flat_action_spec = tf.nest.flatten(action_spec)
    if len(flat_action_spec) > 1:
      raise ValueError('Only a single action is supported by this network')
    self._single_action_spec = flat_action_spec[0]

    if kernel_initializer is None:
      kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
          scale=1. / 3., mode='fan_in', distribution='uniform')
    if last_kernel_initializer is None:
      last_kernel_initializer = tf.keras.initializers.RandomUniform(
          minval=-0.003, maxval=0.003)

    encoder = encoding_network.EncodingNetwork(
        observation_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=preprocessing_combiner,
        conv_layer_params=observation_conv_layer_params,
        fc_layer_params=observation_fc_layer_params,
        dropout_layer_params=observation_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        batch_squash=batch_squash,
        name='observation_encoding') 
    self._encoder = encoder

    self._action_layers = utils.mlp_layers(
        None,
        action_fc_layer_params,
        action_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        name='action_encoding')

    self._joint_layers = utils.mlp_layers(
        None,
        joint_fc_layer_params,
        joint_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        name='joint_mlp')

    self._joint_layers.append(
        tf.keras.layers.Dense(
            1,
            activation=output_activation_fn,
            kernel_initializer=last_kernel_initializer,
            name='value'))