def test_shared_embedding_column_with_non_sequence_categorical(self):
        """Tests that error is raised for non-sequence shared embedding column."""
        vocabulary_size = 3
        sparse_input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        sparse_input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))

        categorical_column_a = fc.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        categorical_column_b = fc.categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        shared_embedding_columns = fc.shared_embedding_columns_v2(
            [categorical_column_a, categorical_column_b], dimension=2)

        sequence_input_layer = ksfc.SequenceFeatures(shared_embedding_columns)
        with self.assertRaisesRegex(
                ValueError,
                r'In embedding_column: aaa_shared_embedding\. categorical_column must '
                r'be of type SequenceCategoricalColumn to use SequenceFeatures\.'
        ):
            _, _ = sequence_input_layer({
                'aaa': sparse_input_a,
                'bbb': sparse_input_b
            })
    def test_indicator_column(self, sparse_input_args_a, sparse_input_args_b,
                              expected_input_layer, expected_sequence_length):
        sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
        sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)

        vocabulary_size_a = 3
        vocabulary_size_b = 2

        categorical_column_a = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size_a)
        indicator_column_a = fc.indicator_column(categorical_column_a)
        categorical_column_b = sfc.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size_b)
        indicator_column_b = fc.indicator_column(categorical_column_b)
        # Test that columns are reordered alphabetically.
        sequence_input_layer = ksfc.SequenceFeatures(
            [indicator_column_b, indicator_column_a])
        input_layer, sequence_length = sequence_input_layer({
            'aaa':
            sparse_input_a,
            'bbb':
            sparse_input_b
        })

        self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
        self.assertAllEqual(expected_sequence_length,
                            self.evaluate(sequence_length))
    def test_sequence_length_not_equal(self):
        """Tests that an error is raised when sequence lengths are not equal."""
        # Input a with sequence_length = [2, 1]
        sparse_input_a = sparse_tensor.SparseTensorValue(indices=((0, 0),
                                                                  (0, 1), (1,
                                                                           0)),
                                                         values=(0., 1., 10.),
                                                         dense_shape=(2, 2))
        # Input b with sequence_length = [1, 1]
        sparse_input_b = sparse_tensor.SparseTensorValue(indices=((0, 0), (1,
                                                                           0)),
                                                         values=(1., 10.),
                                                         dense_shape=(2, 2))
        numeric_column_a = sfc.sequence_numeric_column('aaa')
        numeric_column_b = sfc.sequence_numeric_column('bbb')

        sequence_input_layer = ksfc.SequenceFeatures(
            [numeric_column_a, numeric_column_b])

        with self.assertRaisesRegex(errors.InvalidArgumentError,
                                    r'Condition x == y did not hold.*'):
            _, sequence_length = sequence_input_layer({
                'aaa': sparse_input_a,
                'bbb': sparse_input_b
            })
            self.evaluate(sequence_length)
    def test_shared_sequence_non_sequence_into_input_layer(self):
        non_seq = fc.categorical_column_with_identity('non_seq',
                                                      num_buckets=10)
        seq = sfc.sequence_categorical_column_with_identity('seq',
                                                            num_buckets=10)
        shared_non_seq, shared_seq = fc.shared_embedding_columns_v2(
            [non_seq, seq],
            dimension=4,
            combiner='sum',
            initializer=init_ops_v2.Ones(),
            shared_embedding_collection_name='shared')

        seq = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                         values=[0, 1, 2],
                                         dense_shape=[2, 2])
        non_seq = sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                             values=[0, 1, 2],
                                             dense_shape=[2, 2])
        features = {'seq': seq, 'non_seq': non_seq}

        # Tile the context features across the sequence features
        seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
        non_seq_input = dense_features.DenseFeatures([shared_non_seq
                                                      ])(features)

        with self.cached_session() as sess:
            sess.run(variables.global_variables_initializer())
            output_seq, output_seq_length, output_non_seq = sess.run(
                [seq_input, seq_length, non_seq_input])
            self.assertAllEqual(
                output_seq,
                [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]])
            self.assertAllEqual(output_seq_length, [2, 1])
            self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
    def test_compute_output_shape(self):
        price1 = sfc.sequence_numeric_column('price1', shape=2)
        price2 = sfc.sequence_numeric_column('price2')
        features = {
            'price1':
            sparse_tensor.SparseTensor(
                indices=[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 0, 0],
                         [1, 0, 1], [2, 0, 0],
                         [2, 0, 1], [3, 0, 0], [3,
                                                0, 1]],
                values=[0., 1., 10., 11., 100., 101., 200., 201., 300., 301.],
                dense_shape=(4, 3, 2)),
            'price2':
            sparse_tensor.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [2, 0],
                                                [3, 0]],
                                       values=[10., 11., 20., 30., 40.],
                                       dense_shape=(4, 3))
        }
        sequence_features = ksfc.SequenceFeatures([price1, price2])
        seq_input, seq_len = sequence_features(features)
        self.assertEqual(sequence_features.compute_output_shape((None, None)),
                         (None, None, 3))
        self.evaluate(variables_lib.global_variables_initializer())
        self.evaluate(lookup_ops.tables_initializer())

        self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
                             [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
                             [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
                             [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
                            self.evaluate(seq_input))
        self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
    def test_serialization_sequence_features(self):
        rating = sfc.sequence_numeric_column('rating')
        sequence_feature = ksfc.SequenceFeatures([rating])
        config = keras.layers.serialize(sequence_feature)

        revived = keras.layers.deserialize(config)
        self.assertIsInstance(revived, ksfc.SequenceFeatures)
예제 #7
0
    def test_saving_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column('a'),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
            'b':
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name='b',
                               dtype='string')
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not context.executing_eagerly():
                self.evaluate(lookup_ops.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }, steps=1), batch_size)
예제 #8
0
  def test_embedding_column(
      self, sparse_input_args_a, sparse_input_args_b, expected_input_layer,
      expected_sequence_length):

    sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a)
    sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b)
    vocabulary_size = 3
    embedding_dimension_a = 2
    embedding_values_a = (
        (1., 2.),  # id 0
        (3., 4.),  # id 1
        (5., 6.)  # id 2
    )
    embedding_dimension_b = 3
    embedding_values_b = (
        (11., 12., 13.),  # id 0
        (14., 15., 16.),  # id 1
        (17., 18., 19.)  # id 2
    )
    def _get_initializer(embedding_dimension, embedding_values):

      def _initializer(shape, dtype, partition_info=None):
        self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
        self.assertEqual(dtypes.float32, dtype)
        self.assertIsNone(partition_info)
        return embedding_values
      return _initializer

    categorical_column_a = sfc.sequence_categorical_column_with_identity(
        key='aaa', num_buckets=vocabulary_size)
    embedding_column_a = fc.embedding_column(
        categorical_column_a,
        dimension=embedding_dimension_a,
        initializer=_get_initializer(embedding_dimension_a, embedding_values_a))
    categorical_column_b = sfc.sequence_categorical_column_with_identity(
        key='bbb', num_buckets=vocabulary_size)
    embedding_column_b = fc.embedding_column(
        categorical_column_b,
        dimension=embedding_dimension_b,
        initializer=_get_initializer(embedding_dimension_b, embedding_values_b))

    # Test that columns are reordered alphabetically.
    sequence_input_layer = ksfc.SequenceFeatures(
        [embedding_column_b, embedding_column_a])
    input_layer, sequence_length = sequence_input_layer({
        'aaa': sparse_input_a, 'bbb': sparse_input_b,})

    self.evaluate(variables_lib.global_variables_initializer())
    weights = sequence_input_layer.weights
    self.assertCountEqual(
        ('sequence_features/aaa_embedding/embedding_weights:0',
         'sequence_features/bbb_embedding/embedding_weights:0'),
        tuple([v.name for v in weights]))
    self.assertAllEqual(embedding_values_a, self.evaluate(weights[0]))
    self.assertAllEqual(embedding_values_b, self.evaluate(weights[1]))
    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
    self.assertAllEqual(
        expected_sequence_length, self.evaluate(sequence_length))
    def test_static_shape_from_tensors_numeric(self, sparse_input_args,
                                               expected_shape):
        """Tests that we return a known static shape when we have one."""
        sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
        numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

        sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
        input_layer, _ = sequence_input_layer({'aaa': sparse_input})
        shape = input_layer.get_shape()
        self.assertEqual(shape, expected_shape)
예제 #10
0
  def test_from_config(self, trainable, name):
    cols = [sfc.sequence_numeric_column('a')]
    orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
    config = orig_layer.get_config()

    new_layer = ksfc.SequenceFeatures.from_config(config)

    self.assertEqual(new_layer.name, orig_layer.name)
    self.assertEqual(new_layer.trainable, trainable)
    self.assertLen(new_layer._feature_columns, 1)
    self.assertEqual(new_layer._feature_columns[0].name, 'a')
예제 #11
0
  def test_get_config(self, trainable, name):
    cols = [sfc.sequence_numeric_column('a')]
    orig_layer = ksfc.SequenceFeatures(cols, trainable=trainable, name=name)
    config = orig_layer.get_config()

    self.assertEqual(config['name'], orig_layer.name)
    self.assertEqual(config['trainable'], trainable)
    self.assertLen(config['feature_columns'], 1)
    self.assertEqual(config['feature_columns'][0]['class_name'],
                     'SequenceNumericColumn')
    self.assertEqual(config['feature_columns'][0]['config']['shape'], (1,))
    def test_static_shape_from_tensors_indicator(self, sparse_input_args,
                                                 expected_shape):
        """Tests that we return a known static shape when we have one."""
        sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)
        categorical_column = sfc.sequence_categorical_column_with_identity(
            key='aaa', num_buckets=3)
        indicator_column = fc.indicator_column(categorical_column)

        sequence_input_layer = ksfc.SequenceFeatures([indicator_column])
        input_layer, _ = sequence_input_layer({'aaa': sparse_input})
        shape = input_layer.get_shape()
        self.assertEqual(shape, expected_shape)
예제 #13
0
  def test_numeric_column(
      self, sparse_input_args, expected_input_layer, expected_sequence_length):
    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)

    numeric_column = sfc.sequence_numeric_column('aaa')

    sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})

    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
    self.assertAllEqual(
        expected_sequence_length, self.evaluate(sequence_length))
예제 #14
0
  def test_numeric_column_multi_dim(
      self, sparse_input_args, expected_input_layer, expected_sequence_length):
    """Tests SequenceFeatures for multi-dimensional numeric_column."""
    sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args)

    numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2))

    sequence_input_layer = ksfc.SequenceFeatures([numeric_column])
    input_layer, sequence_length = sequence_input_layer({'aaa': sparse_input})

    self.assertAllEqual(expected_input_layer, self.evaluate(input_layer))
    self.assertAllEqual(
        expected_sequence_length, self.evaluate(sequence_length))
예제 #15
0
def make_model(feature_config: FeatureConfig):

    ctx_inputs, seq_inputs = feature_config.make_model_input_configs()
    x = tf.keras.layers.DenseFeatures(
        feature_config.context_features)(ctx_inputs)
    y, _ = ksfc.SequenceFeatures(feature_config.sequence_features)(seq_inputs)
    z = ContextSequenceConcat()((x, y))
    z = tf.keras.layers.Dense(16)(z)
    z = tf.keras.layers.LSTM(units=16, return_sequences=True)(z)
    z = tf.keras.layers.Dense(8)(z)
    z = tf.keras.layers.Dense(policy_vector_size(), name="logits")(z)

    all_inputs = list(ctx_inputs.values()) + list(seq_inputs.values())
    return tf.keras.Model(inputs=all_inputs, outputs=z)
    def test_indicator_column_with_non_sequence_categorical(self):
        """Tests that error is raised for non-sequence categorical column."""
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))

        categorical_column_a = fc.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        indicator_column_a = fc.indicator_column(categorical_column_a)

        sequence_input_layer = ksfc.SequenceFeatures([indicator_column_a])
        with self.assertRaisesRegex(
                ValueError,
                r'In indicator_column: aaa_indicator\. categorical_column must be of '
                r'type SequenceCategoricalColumn to use SequenceFeatures\.'):
            _, _ = sequence_input_layer({'aaa': sparse_input})
예제 #17
0
  def test_sequence_example_into_input_layer(self):
    examples = [_make_sequence_example().SerializeToString()] * 100
    ctx_cols, seq_cols = self._build_feature_columns()

    def _parse_example(example):
      ctx, seq = parsing_ops.parse_single_sequence_example(
          example,
          context_features=fc.make_parse_example_spec_v2(ctx_cols),
          sequence_features=fc.make_parse_example_spec_v2(seq_cols))
      ctx.update(seq)
      return ctx

    ds = dataset_ops.Dataset.from_tensor_slices(examples)
    ds = ds.map(_parse_example)
    ds = ds.batch(20)

    # Test on a single batch
    features = dataset_ops.make_one_shot_iterator(ds).get_next()

    # Tile the context features across the sequence features
    sequence_input_layer = ksfc.SequenceFeatures(seq_cols)
    seq_input, _ = sequence_input_layer(features)
    dense_input_layer = dense_features.DenseFeatures(ctx_cols)
    ctx_input = dense_input_layer(features)
    ctx_input = core.RepeatVector(array_ops.shape(seq_input)[1])(ctx_input)
    concatenated_input = merge.concatenate([seq_input, ctx_input])

    rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10))
    output = rnn_layer(concatenated_input)

    with self.cached_session() as sess:
      sess.run(variables.global_variables_initializer())
      features_r = sess.run(features)
      self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6])

      output_r = sess.run(output)
      self.assertAllEqual(output_r.shape, [20, 10])
예제 #18
0
    def test_save_load_with_sequence_features(self):
        cols = [
            feature_column_lib.sequence_numeric_column("a"),
            feature_column_lib.indicator_column(
                feature_column_lib.
                sequence_categorical_column_with_vocabulary_list(
                    "b", ["one", "two"])),
        ]
        input_layers = {
            "a":
            keras.layers.Input(shape=(None, 1), sparse=True, name="a"),
            "b":
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name="b",
                               dtype="string"),
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(
            loss=keras.losses.MSE,
            optimizer="rmsprop",
            metrics=[keras.metrics.categorical_accuracy],
        )

        tiledb_uri = os.path.join(self.get_temp_dir(), "model_array")
        tiledb_model_obj = TensorflowTileDB(uri=tiledb_uri)
        tiledb_model_obj.save(model=model, include_optimizer=True)
        loaded_model = tiledb_model_obj.load(compile_model=True)

        model_opt_weights = batch_get_value(getattr(model.optimizer,
                                                    "weights"))
        loaded_opt_weights = batch_get_value(
            getattr(loaded_model.optimizer, "weights"))

        # Assert optimizer weights are equal
        for weight_model, weight_loaded_model in zip(model_opt_weights,
                                                     loaded_opt_weights):
            np.testing.assert_array_equal(weight_model, weight_loaded_model)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = sparse_tensor.SparseTensor(indices_a, values_a,
                                              (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = sparse_tensor.SparseTensor(indices_b, values_b,
                                              (batch_size, timesteps, 1))

        # Assert model predictions are equal
        np.testing.assert_array_equal(
            loaded_model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
            model.predict({
                "a": inputs_a,
                "b": inputs_b
            }, steps=1),
        )
    def test_shared_embedding_column(self):
        with ops.Graph().as_default():
            vocabulary_size = 3
            sparse_input_a = sparse_tensor.SparseTensorValue(
                # example 0, ids [2]
                # example 1, ids [0, 1]
                indices=((0, 0), (1, 0), (1, 1)),
                values=(2, 0, 1),
                dense_shape=(2, 2))
            sparse_input_b = sparse_tensor.SparseTensorValue(
                # example 0, ids [1]
                # example 1, ids [2, 0]
                indices=((0, 0), (1, 0), (1, 1)),
                values=(1, 2, 0),
                dense_shape=(2, 2))

            embedding_dimension = 2
            embedding_values = (
                (1., 2.),  # id 0
                (3., 4.),  # id 1
                (5., 6.)  # id 2
            )

            def _get_initializer(embedding_dimension, embedding_values):
                def _initializer(shape, dtype, partition_info=None):
                    self.assertAllEqual((vocabulary_size, embedding_dimension),
                                        shape)
                    self.assertEqual(dtypes.float32, dtype)
                    self.assertIsNone(partition_info)
                    return embedding_values

                return _initializer

            expected_input_layer = [
                # example 0, ids_a [2], ids_b [1]
                [[5., 6., 3., 4.], [0., 0., 0., 0.]],
                # example 1, ids_a [0, 1], ids_b [2, 0]
                [[1., 2., 5., 6.], [3., 4., 1., 2.]],
            ]
            expected_sequence_length = [1, 2]

            categorical_column_a = sfc.sequence_categorical_column_with_identity(
                key='aaa', num_buckets=vocabulary_size)
            categorical_column_b = sfc.sequence_categorical_column_with_identity(
                key='bbb', num_buckets=vocabulary_size)
            # Test that columns are reordered alphabetically.
            shared_embedding_columns = fc.shared_embedding_columns_v2(
                [categorical_column_b, categorical_column_a],
                dimension=embedding_dimension,
                initializer=_get_initializer(embedding_dimension,
                                             embedding_values))

            sequence_input_layer = ksfc.SequenceFeatures(
                shared_embedding_columns)
            input_layer, sequence_length = sequence_input_layer({
                'aaa':
                sparse_input_a,
                'bbb':
                sparse_input_b
            })

            global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
            self.assertCountEqual(('aaa_bbb_shared_embedding:0', ),
                                  tuple([v.name for v in global_vars]))
            with _initialized_session() as sess:
                self.assertAllEqual(embedding_values,
                                    global_vars[0].eval(session=sess))
                self.assertAllEqual(expected_input_layer,
                                    input_layer.eval(session=sess))
                self.assertAllEqual(expected_sequence_length,
                                    sequence_length.eval(session=sess))