Example #1
0
    def test_multiple_layers_with_same_shared_embedding_column(self):
        categorical_column_a = tf.feature_column.categorical_column_with_identity(
            key='aaa', num_buckets=3)
        categorical_column_b = tf.feature_column.categorical_column_with_identity(
            key='bbb', num_buckets=3)
        embedding_dimension = 2

        # feature_column.shared_embeddings is not supported in eager.
        with tf.Graph().as_default():
            embedding_column_b, embedding_column_a = tf.feature_column.shared_embeddings(
                [categorical_column_b, categorical_column_a],
                dimension=embedding_dimension)
            features = {
                'aaa':
                tf.SparseTensor(indices=((0, 0), (1, 0), (1, 1)),
                                values=(0, 1, 0),
                                dense_shape=(2, 2)),
                'bbb':
                tf.SparseTensor(indices=((0, 0), (1, 0), (1, 1)),
                                values=(1, 2, 1),
                                dense_shape=(2, 2)),
            }
            all_cols = [embedding_column_a, embedding_column_b]
            df.DenseFeatures(all_cols)(features)
            df.DenseFeatures(all_cols)(features)
            # Make sure that only 1 variable gets created in this case.
            self.assertEqual(
                1,
                len(
                    tf.compat.v1.get_collection(
                        tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)))
            self.assertItemsEqual(['aaa_bbb_shared_embedding:0'], [
                v.name for v in tf.compat.v1.get_collection(
                    tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
            ])
Example #2
0
    def test_shared_sequence_non_sequence_into_input_layer(self):
        non_seq = tf.feature_column.categorical_column_with_identity(
            'non_seq', num_buckets=10)
        seq = tf.feature_column.sequence_categorical_column_with_identity(
            'seq', num_buckets=10)
        shared_non_seq, shared_seq = tf.feature_column.shared_embeddings(
            [non_seq, seq],
            dimension=4,
            combiner='sum',
            initializer=tf.ones_initializer(),
            shared_embedding_collection_name='shared')

        seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                              values=[0, 1, 2],
                              dense_shape=[2, 2])
        non_seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                  values=[0, 1, 2],
                                  dense_shape=[2, 2])
        features = {'seq': seq, 'non_seq': non_seq}

        # Tile the context features across the sequence features
        seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
        non_seq_input = dense_features.DenseFeatures([shared_non_seq
                                                      ])(features)

        with self.cached_session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_seq, output_seq_length, output_non_seq = sess.run(
                [seq_input, seq_length, non_seq_input])
            self.assertAllEqual(
                output_seq,
                [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]])
            self.assertAllEqual(output_seq_length, [2, 1])
            self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
Example #3
0
 def test_crossing_sparse_inputs_depth_tuple(self):
   layer = category_crossing.CategoryCrossing(depth=(2, 3))
   inputs_0 = tf.SparseTensor(
       indices=[[0, 0], [1, 0], [2, 0]],
       values=['a', 'b', 'c'],
       dense_shape=[3, 1])
   inputs_1 = tf.SparseTensor(
       indices=[[0, 0], [1, 0], [2, 0]],
       values=['d', 'e', 'f'],
       dense_shape=[3, 1])
   inputs_2 = tf.SparseTensor(
       indices=[[0, 0], [1, 0], [2, 0]],
       values=['g', 'h', 'i'],
       dense_shape=[3, 1])
   inp_0_t = input_layer.Input(shape=(1,), sparse=True, dtype=tf.string)
   inp_1_t = input_layer.Input(shape=(1,), sparse=True, dtype=tf.string)
   inp_2_t = input_layer.Input(shape=(1,), sparse=True, dtype=tf.string)
   out_t = layer([inp_0_t, inp_1_t, inp_2_t])
   model = training.Model([inp_0_t, inp_1_t, inp_2_t], out_t)
   output = model.predict([inputs_0, inputs_1, inputs_2])
   self.assertIsInstance(output, tf.SparseTensor)
   output = tf.sparse.to_dense(output)
   expected_outputs_0 = [[b'a_X_d', b'a_X_g', b'd_X_g', b'a_X_d_X_g']]
   expected_outputs_1 = [[b'b_X_e', b'b_X_h', b'e_X_h', b'b_X_e_X_h']]
   expected_outputs_2 = [[b'c_X_f', b'c_X_i', b'f_X_i', b'c_X_f_X_i']]
   expected_out = tf.concat(
       [expected_outputs_0, expected_outputs_1, expected_outputs_2], axis=0)
   self.assertAllEqual(expected_out, output)
Example #4
0
  def test_works_with_registered(self):

    class CustomClass:

      def value(self):
        return tf.convert_to_tensor(42.)

    tf.register_tensor_conversion_function(
        CustomClass, lambda value, **_: value.value())

    tf_utils.register_symbolic_tensor_type(CustomClass)

    if tf.executing_eagerly():
      self.assertFalse(tf_utils.is_symbolic_tensor(
          tf.Variable(name='blah', initial_value=0.)))
      self.assertFalse(
          tf_utils.is_symbolic_tensor(
              tf.convert_to_tensor(0.)))
      self.assertFalse(tf_utils.is_symbolic_tensor(
          tf.SparseTensor(
              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
      self.assertFalse(tf_utils.is_symbolic_tensor(CustomClass()))
    else:
      self.assertTrue(tf_utils.is_symbolic_tensor(
          tf.Variable(name='blah', initial_value=0.)))
      self.assertTrue(
          tf_utils.is_symbolic_tensor(
              tf.convert_to_tensor(0.)))
      self.assertTrue(tf_utils.is_symbolic_tensor(
          tf.SparseTensor(
              indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])))
      self.assertTrue(tf_utils.is_symbolic_tensor(CustomClass()))
Example #5
0
    def test_compute_output_shape(self):
        price1 = tf.feature_column.sequence_numeric_column('price1', shape=2)
        price2 = tf.feature_column.sequence_numeric_column('price2')
        features = {
            'price1':
            tf.SparseTensor(indices=[[0, 0, 0], [0, 0, 1], [0, 1,
                                                            0], [0, 1, 1],
                                     [1, 0, 0], [1, 0, 1], [2, 0, 0],
                                     [2, 0, 1], [3, 0, 0], [3, 0, 1]],
                            values=[
                                0., 1., 10., 11., 100., 101., 200., 201., 300.,
                                301.
                            ],
                            dense_shape=(4, 3, 2)),
            'price2':
            tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [2, 0], [3, 0]],
                            values=[10., 11., 20., 30., 40.],
                            dense_shape=(4, 3))
        }
        sequence_features = ksfc.SequenceFeatures([price1, price2])
        seq_input, seq_len = sequence_features(features)
        self.assertEqual(sequence_features.compute_output_shape((None, None)),
                         (None, None, 3))
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.evaluate(tf.compat.v1.tables_initializer())

        self.assertAllClose([[[0., 1., 10.], [10., 11., 11.], [0., 0., 0.]],
                             [[100., 101., 20.], [0., 0., 0.], [0., 0., 0.]],
                             [[200., 201., 30.], [0., 0., 0.], [0., 0., 0.]],
                             [[300., 301., 40.], [0., 0., 0.], [0., 0., 0.]]],
                            self.evaluate(seq_input))
        self.assertAllClose([2, 1, 1, 1], self.evaluate(seq_len))
Example #6
0
  def test_sparse_input_sparse_output_with_weights(self):
    indices = [[0, 0], [1, 1], [2, 0], [2, 1], [3, 1]]
    sp_inp = tf.SparseTensor(
        indices=indices, values=[0, 2, 1, 1, 0], dense_shape=[4, 2])
    input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True)
    sp_weight = tf.SparseTensor(
        indices=indices, values=[.1, .2, .4, .3, .2], dense_shape=[4, 2])
    weight_data = keras.Input(shape=(None,), dtype=tf.float32, sparse=True)

    # The expected output should be (X for missing value):
    # [[1, X, X, X]
    #  [X, X, 1, X]
    #  [X, 2, X, X]
    #  [1, X, X, X]]
    expected_indices = [[0, 0], [1, 2], [2, 1], [3, 0]]
    expected_values = [.1, .2, .7, .2]
    num_tokens = 6

    layer = category_encoding.CategoryEncoding(
        num_tokens=num_tokens, output_mode=category_encoding.COUNT, sparse=True)
    int_data = layer(input_data, count_weights=weight_data)

    model = keras.Model(inputs=[input_data, weight_data], outputs=int_data)
    sp_output_dataset = model.predict([sp_inp, sp_weight], steps=1)
    self.assertAllClose(expected_values, sp_output_dataset.values)
    self.assertAllEqual(expected_indices, sp_output_dataset.indices)
Example #7
0
 def test_default_behavior(self):
     if tf.executing_eagerly():
         self.assertFalse(
             tf_utils.is_symbolic_tensor(
                 tf.Variable(name="blah", initial_value=0.0)))
         self.assertFalse(
             tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)))
         self.assertFalse(
             tf_utils.is_symbolic_tensor(
                 tf.SparseTensor(
                     indices=[[0, 0], [1, 2]],
                     values=[1, 2],
                     dense_shape=[3, 4],
                 )))
     else:
         self.assertTrue(
             tf_utils.is_symbolic_tensor(
                 tf.Variable(name="blah", initial_value=0.0)))
         self.assertTrue(
             tf_utils.is_symbolic_tensor(tf.convert_to_tensor(0.0)))
         self.assertTrue(
             tf_utils.is_symbolic_tensor(
                 tf.SparseTensor(
                     indices=[[0, 0], [1, 2]],
                     values=[1, 2],
                     dense_shape=[3, 4],
                 )))
Example #8
0
    def test_saving_with_sequence_features(self):
        cols = [
            tf.feature_column.sequence_numeric_column('a'),
            tf.feature_column.indicator_column(
                tf.feature_column.
                sequence_categorical_column_with_vocabulary_list(
                    'b', ['one', 'two']))
        ]
        input_layers = {
            'a':
            keras.layers.Input(shape=(None, 1), sparse=True, name='a'),
            'b':
            keras.layers.Input(shape=(None, 1),
                               sparse=True,
                               name='b',
                               dtype='string')
        }

        fc_layer, _ = ksfc.SequenceFeatures(cols)(input_layers)
        # TODO(tibell): Figure out the right dtype and apply masking.
        # sequence_length_mask = array_ops.sequence_mask(sequence_length)
        # x = keras.layers.GRU(32)(fc_layer, mask=sequence_length_mask)
        x = keras.layers.GRU(32)(fc_layer)
        output = keras.layers.Dense(10)(x)

        model = keras.models.Model(input_layers, output)

        model.compile(loss=keras.losses.MSE,
                      optimizer='rmsprop',
                      metrics=[keras.metrics.categorical_accuracy])

        config = model.to_json()
        loaded_model = model_config.model_from_json(config)

        batch_size = 10
        timesteps = 1

        values_a = np.arange(10, dtype=np.float32)
        indices_a = np.zeros((10, 3), dtype=np.int64)
        indices_a[:, 0] = np.arange(10)
        inputs_a = tf.SparseTensor(indices_a, values_a,
                                   (batch_size, timesteps, 1))

        values_b = np.zeros(10, dtype=np.str)
        indices_b = np.zeros((10, 3), dtype=np.int64)
        indices_b[:, 0] = np.arange(10)
        inputs_b = tf.SparseTensor(indices_b, values_b,
                                   (batch_size, timesteps, 1))

        with self.cached_session():
            # Initialize tables for V1 lookup.
            if not tf.executing_eagerly():
                self.evaluate(tf.compat.v1.tables_initializer())

            self.assertLen(
                loaded_model.predict({
                    'a': inputs_a,
                    'b': inputs_b
                }, steps=1), batch_size)
Example #9
0
 def test_sparse_concatenation(self):
     tensor_1 = tf.SparseTensor([[0, 0]], [1], [1, 1])
     tensor_2 = tf.SparseTensor([[0, 0]], [2], [1, 1])
     concatenated_tensor = training_utils_v1._append_composite_tensor(
         tensor_1, tensor_2)
     evaluated_tensor = self.evaluate(concatenated_tensor)
     self.assertAllEqual(evaluated_tensor.indices, [[0, 0], [1, 0]])
     self.assertAllEqual(evaluated_tensor.values, [1, 2])
     self.assertAllEqual(evaluated_tensor.dense_shape, [2, 1])
Example #10
0
def _make_sparse_tensor_dict():
  rel_name1 = 'real_stuff'
  # Note, these matrices are transposed.
  sparse_tensor1 = tf.SparseTensor(
      indices=[[0, 0], [99, 1]], values=[1., 2.], dense_shape=[100, 2])
  rel_name2 = 'other_stuff'
  sparse_tensor2 = tf.SparseTensor(
      indices=[[100, 0]], values=[3.], dense_shape=[1000, 2])
  return {rel_name1: sparse_tensor1, rel_name2: sparse_tensor2}
Example #11
0
 def test_crossing_sparse_inputs_empty_sep(self):
   layer = category_crossing.CategoryCrossing(separator='')
   inputs_0 = tf.SparseTensor(
       indices=[[0, 0], [1, 0], [1, 1]],
       values=['a', 'b', 'c'],
       dense_shape=[2, 2])
   inputs_1 = tf.SparseTensor(
       indices=[[0, 1], [1, 2]], values=['d', 'e'], dense_shape=[2, 3])
   output = layer([inputs_0, inputs_1])
   self.assertAllClose(np.asarray([[0, 0], [1, 0], [1, 1]]), output.indices)
   self.assertAllEqual([b'ad', b'be', b'ce'], output.values)
Example #12
0
 def test_crossing_sparse_inputs_depth_int(self):
     layer = category_crossing.CategoryCrossing(depth=1)
     inputs_0 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                values=['a', 'b', 'c'],
                                dense_shape=[3, 1])
     inputs_1 = tf.SparseTensor(indices=[[0, 0], [1, 0], [2, 0]],
                                values=['d', 'e', 'f'],
                                dense_shape=[3, 1])
     output = layer([inputs_0, inputs_1])
     self.assertIsInstance(output, tf.SparseTensor)
     output = tf.sparse.to_dense(output)
     expected_out = [[b'a', b'd'], [b'b', b'e'], [b'c', b'f']]
     self.assertAllEqual(expected_out, output)
Example #13
0
    def test_sparse_tensors(self, use_dict, use_dataset, action):
        data = [
            (
                tf.SparseTensor([[0, 0, 0], [1, 0, 0], [1, 0, 1]], [1, 2, 3],
                                [2, 1, 3]),
                np.array([[[1, -1, -1]], [[2, 3, -1]]]),
            ),
            (
                tf.SparseTensor(
                    [[0, 0, 0], [1, 0, 0], [1, 0, 1], [2, 0, 1]],
                    [5, 6, 7, 8],
                    [3, 1, 4],
                ),
                np.array([[[5, -1, -1, -1]], [[6, 7, -1, -1]],
                          [[-1, 8, -1, -1]]]),
            ),
        ]
        # Prepare the model to test.
        input_name = get_input_name(use_dict)
        model_input = input_layer.Input(shape=(1, None),
                                        sparse=True,
                                        name=input_name,
                                        dtype=tf.int32)
        layers = [ToDense(default_value=-1)]
        model = get_model_from_layers_with_input(layers,
                                                 model_input=model_input)
        model.compile(optimizer="sgd",
                      loss="mse",
                      metrics=["accuracy"],
                      **get_test_mode_kwargs())
        kwargs = get_kwargs(use_dataset, action)

        # Prepare the input data
        for data_element in data:
            input_data, expected_output = prepare_inputs(
                data_element, use_dict, use_dataset, action, input_name)
            # Perform the action.
            if action == "predict":
                result = model.predict(input_data, **kwargs)
                self.assertAllEqual(expected_output, result)
            if action == "evaluate":
                result = model.evaluate(input_data, expected_output, **kwargs)
                self.assertAllEqual(1.0, result[-1])
            if action == "fit":
                # TODO(momernick): What's the best way of validating that fit
                # happened?
                _ = model.fit(input_data,
                              expected_output,
                              shuffle=False,
                              **kwargs)
Example #14
0
    def test_multiple_layers_with_same_shared_embedding_column(self):
        categorical_column_a = (
            tf.feature_column.categorical_column_with_identity(key="aaa",
                                                               num_buckets=3))
        categorical_column_b = (
            tf.feature_column.categorical_column_with_identity(key="bbb",
                                                               num_buckets=3))
        embedding_dimension = 2
        (
            embedding_column_b,
            embedding_column_a,
        ) = tf.feature_column.shared_embeddings(
            [categorical_column_b, categorical_column_a],
            dimension=embedding_dimension,
        )

        with tf.Graph().as_default():
            features = {
                "aaa":
                tf.SparseTensor(
                    indices=((0, 0), (1, 0), (1, 1)),
                    values=(0, 1, 0),
                    dense_shape=(2, 2),
                ),
                "bbb":
                tf.SparseTensor(
                    indices=((0, 0), (1, 0), (1, 1)),
                    values=(1, 2, 1),
                    dense_shape=(2, 2),
                ),
            }
            all_cols = [embedding_column_a, embedding_column_b]
            df.DenseFeatures(all_cols)(features)
            df.DenseFeatures(all_cols)(features)
            # Make sure that only 1 variable gets created in this case.
            self.assertEqual(
                1,
                len(
                    tf.compat.v1.get_collection(
                        tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)),
            )
            self.assertCountEqual(
                ["aaa_bbb_shared_embedding:0"],
                [
                    v.name for v in tf.compat.v1.get_collection(
                        tf.compat.v1.GraphKeys.GLOBAL_VARIABLES)
                ],
            )
Example #15
0
  def test_sparse_int_input_multi_bucket(self):
    vocab_data = np.array([10, 11, 12, 13], dtype=np.int64)
    input_array = tf.SparseTensor(
        indices=[[0, 0], [1, 2]],
        values=np.array([13, 133], dtype=np.int64),
        dense_shape=[3, 4])

    expected_indices = [[0, 0], [1, 2]]
    expected_values = [6, 2]
    expected_dense_shape = [3, 4]

    input_data = keras.Input(shape=(None,), dtype=tf.int64, sparse=True)
    layer = get_layer_class()(
        max_values=None,
        dtype=tf.int64,
        num_oov_indices=2,
        mask_value=0,
        oov_value=-1)
    layer.set_vocabulary(vocab_data)
    int_data = layer(input_data)
    model = keras.Model(inputs=input_data, outputs=int_data)
    output_data = model.predict(input_array, steps=1)
    self.assertAllEqual(expected_indices, output_data.indices)
    self.assertAllEqual(expected_values, output_data.values)
    self.assertAllEqual(expected_dense_shape, output_data.dense_shape)
Example #16
0
    def call(self, inputs):
        self._maybe_freeze_vocab_size()

        inputs = self._standardize_inputs(inputs, self._key_dtype)
        original_shape = inputs.shape
        # Some ops will not handle scalar input, so uprank to rank 1.
        if inputs.shape.rank == 0:
            inputs = self._expand_dims(inputs, -1)

        if tf_utils.is_sparse(inputs):
            lookups = tf.SparseTensor(inputs.indices,
                                      self._lookup_dense(inputs.values),
                                      inputs.dense_shape)
        elif tf_utils.is_ragged(inputs):
            lookups = tf.ragged.map_flat_values(self._lookup_dense, inputs)
        else:
            lookups = self._lookup_dense(inputs)

        if self.output_mode == INT:
            # If we received a scalar input, downrank back to a scalar.
            if original_shape.rank == 0:
                lookups = tf.squeeze(lookups, -1)
            return lookups

        depth = (self.max_tokens
                 if self.pad_to_max_tokens else self._frozen_vocab_size)
        idf_weights = self.idf_weights_const if self.output_mode == TF_IDF else None
        return utils.encode_categorical_inputs(lookups,
                                               output_mode=self.output_mode,
                                               depth=depth,
                                               dtype=self.compute_dtype,
                                               sparse=self.sparse,
                                               idf_weights=idf_weights)
Example #17
0
    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1.0, 2.0, 3.0, 4.0, 5.0),  # id 0
            (6.0, 7.0, 8.0, 9.0, 10.0),  # id 1
            (11.0, 12.0, 13.0, 14.0, 15.0),  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = tf.feature_column.numeric_column("price")

        # one_hot_body_style has 3 dims in dense_features.
        body_style = tf.feature_column.categorical_column_with_vocabulary_list(
            "body-style", vocabulary_list=["hardtop", "wagon", "sedan"])
        one_hot_body_style = tf.feature_column.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = tf.feature_column.categorical_column_with_vocabulary_list(
            "country", vocabulary_list=["US", "JP", "CA"])
        embedded_country = tf.feature_column.embedding_column(
            country, dimension=5, initializer=_initializer)

        # Provides 1-dim tensor and dense tensor.
        features = {
            "price":
            tf.constant([
                11.0,
                12.0,
            ]),
            "body-style":
            tf.SparseTensor(
                indices=((0, ), (1, )),
                values=("sedan", "hardtop"),
                dense_shape=(2, ),
            ),
            # This is dense tensor for the categorical_column.
            "country":
            tf.constant(["CA", "US"]),
        }
        self.assertEqual(1, features["price"].shape.ndims)
        self.assertEqual(1, features["body-style"].dense_shape.get_shape()[0])
        self.assertEqual(1, features["country"].shape.ndims)

        net = df.DenseFeatures([price, one_hot_body_style,
                                embedded_country])(features)
        self.assertEqual(1 + 3 + 5, net.shape[1])
        with _initialized_session() as sess:

            # Each row is formed by concatenating `embedded_body_style`,
            # `one_hot_body_style`, and `price` in order.
            self.assertAllEqual(
                [
                    [0.0, 0.0, 1.0, 11.0, 12.0, 13.0, 14.0, 15.0, 11.0],
                    [1.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 12.0],
                ],
                sess.run(net),
            )
Example #18
0
  def test_dense_output(self):
    dense_inputs = tf.convert_to_tensor(
        np.random.uniform(size=(10, 10)).astype('f'))
    # Create some sparse data where multiple rows and columns are missing.
    sparse_inputs = tf.SparseTensor(
        indices=np.random.randint(low=0, high=10, size=(5, 2)),
        values=np.random.uniform(size=(5,)).astype('f'),
        dense_shape=[10, 10])
    sparse_inputs = tf.sparse.reorder(sparse_inputs)

    layer = keras.layers.Dense(
        5,
        kernel_initializer=keras.initializers.RandomUniform(),
        bias_initializer=keras.initializers.RandomUniform(),
        dtype='float32')
    dense_outputs = layer(dense_inputs)
    sparse_outpus = layer(sparse_inputs)

    expected_dense = tf.add(
        tf.matmul(dense_inputs, keras.backend.get_value(layer.kernel)),
        keras.backend.get_value(layer.bias))
    expected_sparse = tf.add(
        tf.matmul(
            tf.sparse.to_dense(sparse_inputs),
            keras.backend.get_value(layer.kernel)),
        keras.backend.get_value(layer.bias))

    self.assertAllClose(dense_outputs, expected_dense)
    self.assertAllClose(sparse_outpus, expected_sparse)
Example #19
0
    def test_sparse_tensor_model_predict(self):
        # Create a model that accepts a sparse input and runs a "Dense" layer on
        # it.
        model_input = input_layer.Input(shape=(3, ),
                                        sparse=True,
                                        dtype=tf.float32)

        self.assertEqual([None, 3], model_input.shape.as_list())

        layers = [Dense(2)]
        model = get_model_from_layers_with_input(layers,
                                                 model_input=model_input)

        sparse_input = tf.SparseTensor(
            # A two-row matrix
            indices=[(0, 0), (0, 1), (0, 2), (5, 0), (5, 1), (5, 2)],
            values=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            dense_shape=(6, 3),
        )

        shape = model(sparse_input).shape
        self.assertEqual((6, 2), self._normalize_shape(shape))

        shape = model.predict(sparse_input, steps=1).shape
        self.assertEqual((6, 2), self._normalize_shape(shape))
Example #20
0
def dense_to_sparse(x, ignore_value=None, name=None):
    """Converts dense `Tensor` to `SparseTensor`, dropping `ignore_value` cells.

  Args:
    x: A `Tensor`.
    ignore_value: Entries in `x` equal to this value will be
      absent from the return `SparseTensor`. If `None`, default value of
      `x` dtype will be used (e.g. '' for `str`, 0 for `int`).
    name: Python `str` prefix for ops created by this function.

  Returns:
    sparse_x: A `tf.SparseTensor` with the same shape as `x`.

  Raises:
    ValueError: when `x`'s rank is `None`.
  """
    # Copied (with modifications) from:
    # tensorflow/contrib/layers/python/ops/sparse_ops.py.
    with tf.name_scope(name or 'dense_to_sparse'):
        x = tf.convert_to_tensor(x, name='x')
        if ignore_value is None:
            if dtype_util.base_dtype(x.dtype) == tf.string:
                # Exception due to TF strings are converted to numpy objects by default.
                ignore_value = ''
            else:
                ignore_value = dtype_util.as_numpy_dtype(x.dtype)(0)
            ignore_value = tf.cast(ignore_value, x.dtype, name='ignore_value')
        indices = tf.where(tf.not_equal(x, ignore_value), name='indices')
        return tf.SparseTensor(indices=indices,
                               values=tf.gather_nd(x, indices, name='values'),
                               dense_shape=tf.shape(x,
                                                    out_type=tf.int64,
                                                    name='dense_shape'))
Example #21
0
def dense(inputs, kernel, bias=None, activation=None, dtype=None):
  """Densely connected NN layer op.

  Args:
    inputs: `tf.Tensor` or `tf.SparseTensor`. Inputs to operation.
    kernel: `tf.Variable`. Matrix kernel.
    bias: (Optional) `tf.Variable`. Bias to add to outputs.
    activation: (Optional) 1-argument callable. Activation function to apply to
      outputs.
    dtype: (Optional) `tf.DType`. Dtype to cast `inputs` to.

  Returns:
    `tf.Tensor`. Output of dense connection.
  """
  if dtype:
    if inputs.dtype.base_dtype != dtype.base_dtype:
      inputs = tf.cast(inputs, dtype=dtype)

  rank = inputs.shape.rank
  if rank == 2 or rank is None:
    # We use embedding_lookup_sparse as a more efficient matmul operation for
    # large sparse input tensors. The op will result in a sparse gradient, as
    # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense
    # gradients. This can lead to sigfinicant speedups, see b/171762937.
    if isinstance(inputs, tf.SparseTensor):
      # We need to fill empty rows, as the op assumes at least one id per row.
      inputs, _ = tf.sparse.fill_empty_rows(inputs, 0)
      # We need to do some munging of our input to use the embedding lookup as a
      # matrix multiply. We split our input matrix into separate ids and weights
      # tensors. The values of the ids tensor should be the column indices of
      # our input matrix and the values of the weights tensor can continue to
      # the actual matrix weights. The column arrangement of ids and weights
      # will be summed over and does not matter. See the documentation for
      # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation of the
      # inputs to both ops.
      ids = tf.SparseTensor(
          indices=inputs.indices,
          values=inputs.indices[:, 1],
          dense_shape=inputs.dense_shape)
      weights = inputs
      outputs = tf.nn.embedding_lookup_sparse(
          kernel, ids, weights, combiner="sum")
    else:
      outputs = tf.raw_ops.MatMul(a=inputs, b=kernel)
  # Broadcast kernel to inputs.
  else:
    outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]])
    # Reshape the output back to the original ndim of the input.
    if not tf.executing_eagerly():
      shape = inputs.shape.as_list()
      output_shape = shape[:-1] + [kernel.shape[-1]]
      outputs.set_shape(output_shape)

  if bias is not None:
    outputs = tf.nn.bias_add(outputs, bias)

  if activation is not None:
    outputs = activation(outputs)

  return outputs
  def call(self, inputs):
    bins = [tf.cast(tf.compat.v1.squeeze(self.bins), tf.float32)]

    def _bucketize_fn(inputs):
      return tf.raw_ops.BoostedTreesBucketize(
          float_values=[tf.cast(inputs, tf.float32)],
          bucket_boundaries=bins)[0]

    if tf_utils.is_ragged(inputs):
      integer_buckets = tf.ragged.map_flat_values(
          _bucketize_fn, inputs)
      # Ragged map_flat_values doesn't touch the non-values tensors in the
      # ragged composite tensor. If this op is the only op a Keras model,
      # this can cause errors in Graph mode, so wrap the tensor in an identity.
      return tf.identity(integer_buckets)
    elif isinstance(inputs, tf.SparseTensor):
      return tf.SparseTensor(
          indices=tf.identity(inputs.indices),
          values=_bucketize_fn(inputs.values),
          dense_shape=tf.identity(inputs.dense_shape))
    else:
      static_shape = inputs.get_shape()
      if any(dim is None for dim in static_shape.as_list()[1:]):
        raise NotImplementedError(
            "Discretization Layer requires known non-batch shape,"
            "found {}".format(static_shape))

      dynamic_shape = tf.shape(inputs)
      # BoostedTreesBucketize only handles rank 1 inputs. We need to flatten our
      # inputs after batch size and vectorized_map over each sample.
      reshaped = tf.reshape(inputs, [dynamic_shape[0], -1])
      return tf.reshape(
          tf.vectorized_map(_bucketize_fn, reshaped),
          dynamic_shape)
Example #23
0
  def __call__(self, sentences):
    token_ids, token_values, token_dense_shape = self._tokenize(sentences)

    return tf.nn.safe_embedding_lookup_sparse(
        embedding_weights=self.embeddings,
        sparse_ids=tf.SparseTensor(token_ids, token_values, token_dense_shape),
        sparse_weights=None,
        combiner="sqrtn")
Example #24
0
 def call(self, inputs):
   inputs = self._preprocess_inputs(inputs)
   if isinstance(inputs, tf.SparseTensor):
     return tf.SparseTensor(
         indices=inputs.indices,
         values=self._hash_values_to_bins(inputs.values),
         dense_shape=inputs.dense_shape)
   return self._hash_values_to_bins(inputs)
Example #25
0
 def fn():
     layer = MyLayer()
     layer(
         tf.SparseTensor(indices=[[0, 0]],
                         values=[1],
                         dense_shape=[3, 5]),
         training=False,
     )
Example #26
0
 def test_hash_sparse_int_input_siphash(self):
   layer = hashing.Hashing(num_bins=3, salt=[133, 137])
   indices = [[0, 0], [1, 0], [1, 1], [2, 0], [2, 1]]
   inp = tf.SparseTensor(
       indices=indices, values=[0, 1, 2, 3, 4], dense_shape=[3, 2])
   output = layer(inp)
   self.assertAllClose(indices, output.indices)
   self.assertAllClose([1, 1, 2, 0, 1], output.values)
Example #27
0
 def call(self, inputs):
     if isinstance(inputs, (list, tuple, np.ndarray)):
         inputs = tf.convert_to_tensor(inputs)
     if isinstance(inputs, tf.SparseTensor):
         return tf.SparseTensor(indices=inputs.indices,
                                values=self._hash_values_to_bins(
                                    inputs.values),
                                dense_shape=inputs.dense_shape)
     return self._hash_values_to_bins(inputs)
def for_with_composite_tensor_shape_invariant(l):
    v = tf.SparseTensor(indices=[[0, 0], [1, 1]],
                        values=[1, 2],
                        dense_shape=[3, 3])
    for _ in l:
        tf.autograph.experimental.set_loop_options(
            shape_invariants=[(v, tf.TensorShape(None))])
        v = tf.sparse.expand_dims(v)
    return v
Example #29
0
    def test_with_1d_sparse_tensor(self):
        embedding_values = (
            (1., 2., 3., 4., 5.),  # id 0
            (6., 7., 8., 9., 10.),  # id 1
            (11., 12., 13., 14., 15.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            del shape, dtype, partition_info
            return embedding_values

        # price has 1 dimension in dense_features
        price = tf.feature_column.numeric_column('price')

        # one_hot_body_style has 3 dims in dense_features.
        body_style = tf.feature_column.categorical_column_with_vocabulary_list(
            'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan'])
        one_hot_body_style = tf.feature_column.indicator_column(body_style)

        # embedded_body_style has 5 dims in dense_features.
        country = tf.feature_column.categorical_column_with_vocabulary_list(
            'country', vocabulary_list=['US', 'JP', 'CA'])
        embedded_country = tf.feature_column.embedding_column(
            country, dimension=5, initializer=_initializer)

        with tf.Graph().as_default():
            # Provides 1-dim tensor and dense tensor.
            features = {
                'price':
                tf.constant([
                    11.,
                    12.,
                ]),
                'body-style':
                tf.SparseTensor(indices=((0, ), (1, )),
                                values=('sedan', 'hardtop'),
                                dense_shape=(2, )),
                # This is dense tensor for the categorical_column.
                'country':
                tf.constant(['CA', 'US']),
            }
            self.assertEqual(1, features['price'].shape.ndims)
            self.assertEqual(1,
                             features['body-style'].dense_shape.get_shape()[0])
            self.assertEqual(1, features['country'].shape.ndims)

            net = df.DenseFeatures(
                [price, one_hot_body_style, embedded_country])(features)
            self.assertEqual(1 + 3 + 5, net.shape[1])
            with _initialized_session() as sess:

                # Each row is formed by concatenating `embedded_body_style`,
                # `one_hot_body_style`, and `price` in order.
                self.assertAllEqual(
                    [[0., 0., 1., 11., 12., 13., 14., 15., 11.],
                     [1., 0., 0., 1., 2., 3., 4., 5., 12.]], sess.run(net))
Example #30
0
def encode_categorical_inputs(
    inputs,
    output_mode,
    depth,
    dtype="float32",
    sparse=False,
    count_weights=None,
    idf_weights=None,
):
    """Encodes categoical inputs according to output_mode."""
    if output_mode == INT:
        return tf.identity(tf.cast(inputs, dtype))

    original_shape = inputs.shape
    # In all cases, we should uprank scalar input to a single sample.
    if inputs.shape.rank == 0:
        inputs = expand_dims(inputs, -1)
    # One hot will unprank only if the final output dimension is not already 1.
    if output_mode == ONE_HOT:
        if inputs.shape[-1] != 1:
            inputs = expand_dims(inputs, -1)

    # TODO(b/190445202): remove output rank restriction.
    if inputs.shape.rank > 2:
        raise ValueError(
            f"When output_mode is not `'int'`, maximum supported output rank "
            f"is 2. Received output_mode {output_mode} and input shape "
            f"{original_shape}, "
            f"which would result in output rank {inputs.shape.rank}.")

    binary_output = output_mode in (MULTI_HOT, ONE_HOT)
    if sparse:
        bincounts = sparse_bincount(inputs, depth, binary_output, dtype,
                                    count_weights)
    else:
        bincounts = dense_bincount(inputs, depth, binary_output, dtype,
                                   count_weights)

    if output_mode != TF_IDF:
        return bincounts

    if idf_weights is None:
        raise ValueError(
            f"When output mode is `'tf_idf'`, idf_weights must be provided. "
            f"Received: output_mode={output_mode} and idf_weights={idf_weights}"
        )

    if sparse:
        value_weights = tf.gather(idf_weights, bincounts.indices[:, -1])
        return tf.SparseTensor(
            bincounts.indices,
            value_weights * bincounts.values,
            bincounts.dense_shape,
        )
    else:
        return tf.multiply(bincounts, idf_weights)