Ejemplo n.º 1
0
    def test_invalid_cases(self, shared):

        # Inputs.
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=3)

        # Training on TPU with cpu embedding lookups is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'.*embedding_lookup_device=\"cpu\" during training is not'):
            dense_features(input_features)

        # Inference on with TPU Embedding Hardware is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'Using embedding_lookup_device=tpu_embedding_core during inference is '
        ):
            dense_features(input_features)
        context.Exit()
Ejemplo n.º 2
0
 def test_deepcopy(self):
     categorical_column = fc_lib.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     embedding_column = tpu_fc.embedding_column_v2(categorical_column,
                                                   dimension=2)
     embedding_column_copy = copy.deepcopy(embedding_column)
     self.assertEqual(embedding_column.dimension,
                      embedding_column_copy.dimension)
     self.assertEqual(embedding_column._max_sequence_length,
                      embedding_column_copy._max_sequence_length)
Ejemplo n.º 3
0
 def test_defaults(self):
   categorical_column = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=3)
   embedding_dimension = 2
   embedding_column = tpu_fc.embedding_column_v2(
       categorical_column, dimension=embedding_dimension)
   # Can't test default initializer as it's a random function.
   self.assertIs(categorical_column, embedding_column.categorical_column)
   self.assertEqual(embedding_dimension, embedding_column.dimension)
   self.assertEqual('mean', embedding_column.combiner)
   self.assertEqual('aaa_embedding', embedding_column.name)
   self.assertEqual((embedding_dimension,), embedding_column.variable_shape)
def get_feature_columns():
    initializer = tf.zeros_initializer()

    column = fc_lib.categorical_column_with_identity(key=KEY_NAME,
                                                     num_buckets=BUCKET_SIZE)
    embedding_fc = tpu_fc.embedding_column_v2(column,
                                              dimension=EMBEDDING_DIM,
                                              combiner='mean',
                                              initializer=initializer)

    all_fc = [embedding_fc]
    return all_fc
Ejemplo n.º 5
0
 def test_all_constructor_args(self):
     categorical_column = fc_lib.categorical_column_with_identity(
         key='aaa', num_buckets=3)
     embedding_dimension = 2
     embedding_column = tpu_fc.embedding_column_v2(
         categorical_column,
         dimension=embedding_dimension,
         combiner='my_combiner',
         initializer=lambda: 'my_initializer')
     self.assertIs(categorical_column, embedding_column.categorical_column)
     self.assertEqual(embedding_dimension, embedding_column.dimension)
     self.assertEqual('my_combiner', embedding_column.combiner)
     self.assertEqual('my_initializer', embedding_column.initializer())
     self.assertEqual('aaa_embedding', embedding_column.name)
     self.assertEqual((embedding_dimension, ),
                      embedding_column.variable_shape)
     self.assertEqual({'aaa': parsing_ops.VarLenFeature(dtypes.int64)},
                      embedding_column._parse_example_spec)
Ejemplo n.º 6
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        sparse_input = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            # example 3, ids [1]
            indices=((0, 0), (1, 0), (1, 1), (3, 0)),
            values=(2, 0, 1, 1),
            dense_shape=(4, 2))

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups = (
            # example 0, ids [2], embedding = [7, 11]
            (7., 11.),
            # example 1, ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            (2., 3.5),
            # example 2, ids [], embedding = [0, 0]
            (0., 0.),
            # example 3, ids [1], embedding = [3, 5]
            (3., 5.),
        )
        expected_lookups_sequence = (
            # example 0, ids [2], embedding = [[7, 11], [0, 0]]
            (
                (7., 11.),
                (0., 0.),
            ),
            # example 1, ids [0, 1], embedding = [[1, 2], [3. 5]]
            (
                (1., 2.),
                (3., 5.),
            ),
            # example 2, ids [], embedding = [0, 0]
            (
                (0., 0.),
                (0., 0.),
            ),
            # example 3, ids [1], embedding = [3, 5]
            (
                (3., 5.),
                (0., 0.),
            ),
        )

        # Build columns.
        categorical_column = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        sequence_categorical_column = (
            fc_lib.sequence_categorical_column_with_identity(
                key='bbb', num_buckets=vocabulary_size))
        embedding_column = tpu_fc.embedding_column_v2(
            categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer)
        sequence_embedding_column = tpu_fc.embedding_column_v2(
            sequence_categorical_column,
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_length=2)

        # Provide sparse input and get dense result.
        features = {'aaa': sparse_input, 'bbb': sparse_input}
        dense_features = fc_lib.DenseFeatures([embedding_column])
        sequence_features = fc_lib.SequenceFeatures(
            [sequence_embedding_column])
        embedding_lookup = dense_features(features)
        sequence_embedding_lookup = sequence_features(features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual((
            'dense_features/aaa_embedding/embedding_weights:0',
            'sequence_features/bbb_embedding/embedding_weights:0',
        ), tuple([v.name for v in global_vars]))
        with _initialized_session():
            self.assertAllEqual(embedding_values, global_vars[0].eval())
            self.assertAllEqual(expected_lookups, embedding_lookup.eval())
            self.assertAllEqual(expected_lookups_sequence,
                                sequence_embedding_lookup[0].eval())
Ejemplo n.º 7
0
    def test_dense_embedding_lookup(self, shared, combiner):
        # Inputs.
        vocabulary_size = 3
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1, 3]
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (13., 17.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=vocabulary_size)

        # Set tensor_core_shape to be [None, 20] to ensure some padding and
        # dynamic batch size.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])

        # Run in TPUInferenceContext so that we hit the intended densification case.
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()

        dense_features = fc_lib.DenseFeatures(embedding_column)
        # Sqrtn combiner not supported for now.
        if combiner == 'sqrtn':
            with self.assertRaisesRegexp(
                    ValueError,
                    'Dense TPU Embedding does not support combiner'):
                embedding_lookup = dense_features(input_features)
            return
        if combiner == 'mean':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (2., 3.5
                 ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            )
        elif combiner == 'sum':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (4., 7
                 ),  # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7]
            )

        embedding_lookup = dense_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        if shared:
            self.assertCountEqual(('inp_shared_embedding:0', ),
                                  tuple([v.name for v in global_vars]))
        else:
            self.assertCountEqual(
                ('dense_features/inp_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))

        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            eval_res = embedding_lookup.eval()
            self.assertAllEqual(expected_lookups, eval_res)
        context.Exit()
Ejemplo n.º 8
0
    def test_empty_row(self):
        # Inputs.
        vocabulary_size = 3
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            # example 0, ids []
            # example 1, ids [0, 1, 3]
            indices=((1, 0), (1, 1), (1, 4)),
            values=(0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (13., 17.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=vocabulary_size)

        # Set tensor_core_shape to be [None, 20] to ensure some padding and
        # dynamic batch size.
        embedding_column = tpu_fc.embedding_column_v2(
            categorical_column_input,
            dimension=embedding_dimension,
            initializer=_initializer,
            combiner='mean',
            embedding_lookup_device='tpu_tensor_core',
            tensor_core_shape=[None, 3])

        # Run in TPUContexts so that we hit the intended densification case.
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()
        with tpu_function.tpu_shard_context(1):
            dense_features = fc_lib.DenseFeatures(embedding_column)
            expected_lookups = (
                # example 0:
                (0., 0.),  # ids [], embedding = [0, 0]
                # example 1:
                (2., 3.5
                 ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            )

            embedding_lookup = dense_features(input_features)

            # Assert expected embedding variable and lookups.
            global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
            self.assertCountEqual(
                ('dense_features/inp_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))

            embedding_var = global_vars[0]
            with _initialized_session():
                self.assertAllEqual(embedding_values, embedding_var)
                eval_res = embedding_lookup.eval()
                self.assertAllEqual(expected_lookups, eval_res)
            context.Exit()