Beispiel #1
0
    def rnn_logit_fn(features, mode):
        """Recurrent Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A tuple of `Tensor` objects representing the logits and the sequence
      length mask.
    """
        with ops.name_scope('sequence_input_layer'):
            sequence_input, sequence_length = fc.SequenceFeatures(
                sequence_feature_columns)(features)
            summary.histogram('sequence_length', sequence_length)

            if context_feature_columns:
                context_input = fc.DenseFeatures(context_feature_columns)(
                    features)
                sequence_input = fc.concatenate_context_input(
                    context_input, sequence_input=sequence_input)

        # Ignore output state.
        sequence_length_mask = array_ops.sequence_mask(sequence_length)
        rnn_layer = rnn_layer_fn()
        rnn_outputs = rnn_layer(sequence_input,
                                mask=sequence_length_mask,
                                training=(mode == model_fn.ModeKeys.TRAIN))

        logits = keras_layers.Dense(units=output_units,
                                    name='logits')(rnn_outputs)
        return logits, sequence_length_mask
  def test_serialization_sequence_features(self):
    rating = fc.sequence_numeric_column('rating')
    sequence_feature = fc.SequenceFeatures([rating])
    config = keras.layers.serialize(sequence_feature)

    revived = keras.layers.deserialize(config)
    self.assertIsInstance(revived, fc.SequenceFeatures)
Beispiel #3
0
    def test_saving_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
            'b':
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name='b',
                               dtype='string')
        }

        fc_layer, _ = feature_column_lib.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not context.executing_eagerly():
                self.evaluate(lookup_ops.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }, steps=1), batch_size)
Beispiel #4
0
  def __init__(
      self,
      rnn_layer,
      units,
      sequence_feature_columns,
      context_feature_columns=None,
      activation=None,
      return_sequences=False,
      **kwargs):
    """Initializes a RNNModel instance.

    Args:
      rnn_layer: A Keras RNN layer.
      units: An int indicating the dimension of the logit layer, and of the
        model output.
      sequence_feature_columns: An iterable containing the `FeatureColumn`s
        that represent sequential input. All items in the set should either be
        sequence columns (e.g. `sequence_numeric_column`) or constructed from
        one (e.g. `embedding_column` with `sequence_categorical_column_*` as
        input).
      context_feature_columns: An iterable containing the `FeatureColumn`s
        for contextual input. The data represented by these columns will be
        replicated and given to the RNN at each timestep. These columns must be
        instances of classes derived from `DenseColumn` such as
        `numeric_column`, not the sequential variants.
      activation: Activation function to apply to the logit layer (for instance
        `tf.keras.activations.sigmoid`). If you don't specify anything, no
        activation is applied.
      return_sequences: A boolean indicating whether to return the last output
        in the output sequence, or the full sequence.
      **kwargs: Additional arguments.

    Raises:
      ValueError: If `units` is not an int.
    """
    super(RNNModel, self).__init__(**kwargs)
    if not isinstance(units, int):
      raise ValueError('units must be an int.  Given type: {}'.format(
          type(units)))
    self._return_sequences = return_sequences
    self._sequence_feature_columns = sequence_feature_columns
    self._context_feature_columns = context_feature_columns
    self._sequence_features_layer = fc.SequenceFeatures(
        sequence_feature_columns)
    self._dense_features_layer = None
    if context_feature_columns:
      self._dense_features_layer = dense_features.DenseFeatures(
          context_feature_columns)
    self._rnn_layer = rnn_layer
    self._logits_layer = keras_layers.Dense(
        units=units, activation=activation, name='logits')
Beispiel #5
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 1), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 2))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0, ids [2], embedding = [7, 11]
            (7., 11.),
            # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            (2., 3.5),
            # example 2, ids [], embedding = [0, 0]
            (0., 0.),
            # example 3, ids [1], embedding = [3, 5]
            (3., 5.),
        )
        expected_lookups_sequence = (
            # example 0, ids [2], embedding = [[7, 11], [0, 0]]
            (
                (7., 11.),
                (0., 0.),
            ),
            # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]]
            (
                (1., 2.),
                (3., 5.),
            ),
            # example 2, ids [], embedding = [0, 0]
            (
                (0., 0.),
                (0., 0.),
            ),
            # example 3, ids [1], embedding = [3, 5]
            (
                (3., 5.),
                (0., 0.),
            ),
        )

        # Build columns.
        categorical_column = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        sequence_categorical_column = (
            fc_lib.sequence_categorical_column_with_identity(
                key='bbb', num_buckets=vocabulary_size))
        embedding_column = tpu_fc.embedding_column_v2(
            categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer)
        sequence_embedding_column = tpu_fc.embedding_column_v2(
            sequence_categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_length=2)

        # Provide sparse input and get dense result.
        features = {'aaa': sparse_input, 'bbb': sparse_input}
        dense_features = fc_lib.DenseFeatures([embedding_column])
        sequence_features = fc_lib.SequenceFeatures(
            [sequence_embedding_column])
        embedding_lookup = dense_features(features)
        sequence_embedding_lookup = sequence_features(features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual((
            'dense_features/aaa_embedding/embedding_weights:0',
            'sequence_features/bbb_embedding/embedding_weights:0',
        ), tuple([v.name for v in global_vars]))
        with _initialized_session():
            self.assertAllEqual(embedding_values, global_vars[0].eval())
            self.assertAllEqual(expected_lookups, embedding_lookup.eval())
            self.assertAllEqual(expected_lookups_sequence,
                                sequence_embedding_lookup[0].eval())
Beispiel #6
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(3, 2))
        input_features = {'aaa': input_a, 'bbb': input_b}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups_a = (
            # example 0:
            (7., 11.),  # ids [2], embedding = [7, 11]
            # example 1:
            (2., 3.5
             ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
        )
        expected_lookups_b = (
            # example 0:
            (
                (7., 11.),
                (0., 0.),
            ),  # ids [2], embedding = [[7, 11], [0, 0]]
            # example 1:
            (
                (1., 2.),
                (3., 5.),
            ),  # ids [0, 1], embedding = [[1, 2], [3, 5]]
            # example 2:
            (
                (0., 0.),
                (0., 0.),
            ),  # ids [], embedding = [[0, 0], [0, 0]]
        )

        # Build columns.
        categorical_column_a = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        categorical_column_b = fc_lib.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2(
            [categorical_column_a, categorical_column_b],
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_lengths=[0, 2])

        # Provide sparse input and get dense result.
        dense_features = fc_lib.DenseFeatures([embedding_column_a])
        sequence_features = fc_lib.SequenceFeatures([embedding_column_b])
        embedding_lookup_a = dense_features(input_features)
        embedding_lookup_b = sequence_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual(('aaa_bbb_shared_embedding:0', ),
                              tuple([v.name for v in global_vars]))
        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
            self.assertAllEqual(expected_lookups_b,
                                embedding_lookup_b[0].eval())