コード例 #1
0
 def test_error_dense_shape_invalid(self):
   categorical_column_input = fc_lib.categorical_column_with_identity(
       key='inp', num_buckets=5)
   with self.assertRaisesRegexp(ValueError,
                                'tensor_core_shape must be size 2'):
     tpu_fc.shared_embedding_columns_v2([categorical_column_input],
                                        dimension=20,
                                        tensor_core_shape=[None, 20, 15])
コード例 #2
0
    def test_invalid_cases(self, shared):

        # Inputs.
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=3)

        # Training on TPU with cpu embedding lookups is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='cpu',
                tensor_core_shape=[None, 3])
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'.*embedding_lookup_device=\"cpu\" during training is not'):
            dense_features(input_features)

        # Inference on with TPU Embedding Hardware is not supported.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=2,
                embedding_lookup_device='tpu_embedding_core',
                tensor_core_shape=[None, 3])
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()
        dense_features = fc_lib.DenseFeatures(embedding_column)
        with self.assertRaisesRegexp(
                ValueError,
                r'Using embedding_lookup_device=tpu_embedding_core during inference is '
        ):
            dense_features(input_features)
        context.Exit()
コード例 #3
0
 def test_defaults(self):
     vocabulary_size = 3
     categorical_column_a = fc_lib.categorical_column_with_identity(
         key='aaa', num_buckets=vocabulary_size)
     categorical_column_b = fc_lib.categorical_column_with_identity(
         key='bbb', num_buckets=vocabulary_size)
     embedding_dimension = 2
     embedding_column_b, embedding_column_a = tpu_fc.shared_embedding_columns_v2(
         [categorical_column_b, categorical_column_a],
         dimension=embedding_dimension)
     self.assertIs(categorical_column_a,
                   embedding_column_a.categorical_column)
     self.assertIs(categorical_column_b,
                   embedding_column_b.categorical_column)
     self.assertEqual((vocabulary_size, embedding_dimension),
                      embedding_column_a.get_embedding_table_size())
     self.assertEqual((vocabulary_size, embedding_dimension),
                      embedding_column_a.get_embedding_table_size())
     self.assertEqual('mean', embedding_column_a.combiner)
     self.assertEqual('mean', embedding_column_b.combiner)
     self.assertIsNotNone(embedding_column_a.get_initializer())
     self.assertIsNotNone(embedding_column_b.get_initializer())
     self.assertEqual('aaa_bbb_shared_embedding',
                      embedding_column_a.get_embedding_var_name())
     self.assertEqual('aaa_bbb_shared_embedding',
                      embedding_column_b.get_embedding_var_name())
     self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
     self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
     self.assertEqual((embedding_dimension, ),
                      embedding_column_a.variable_shape)
     self.assertEqual((embedding_dimension, ),
                      embedding_column_b.variable_shape)
コード例 #4
0
 def test_all_constructor_args(self):
   vocabulary_size = 3
   categorical_column_a = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=vocabulary_size)
   categorical_column_b = fc_lib.categorical_column_with_identity(
       key='bbb', num_buckets=vocabulary_size)
   embedding_dimension = 2
   embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2(
       [categorical_column_a, categorical_column_b],
       dimension=embedding_dimension,
       combiner='my_combiner',
       initializer=lambda: 'my_initializer',
       shared_embedding_collection_name='var_scope_name')
   self.assertIs(categorical_column_a, embedding_column_a.categorical_column)
   self.assertIs(categorical_column_b, embedding_column_b.categorical_column)
   self.assertEqual((vocabulary_size, embedding_dimension),
                    embedding_column_a.get_embedding_table_size())
   self.assertEqual((vocabulary_size, embedding_dimension),
                    embedding_column_a.get_embedding_table_size())
   self.assertEqual('my_combiner', embedding_column_a.combiner)
   self.assertEqual('my_combiner', embedding_column_b.combiner)
   self.assertEqual('my_initializer', embedding_column_a.get_initializer()())
   self.assertEqual('my_initializer', embedding_column_b.get_initializer()())
   self.assertEqual('var_scope_name',
                    embedding_column_a.get_embedding_var_name())
   self.assertEqual('var_scope_name',
                    embedding_column_b.get_embedding_var_name())
   self.assertEqual('aaa_shared_embedding', embedding_column_a.name)
   self.assertEqual('bbb_shared_embedding', embedding_column_b.name)
   self.assertEqual((embedding_dimension,), embedding_column_a.variable_shape)
   self.assertEqual((embedding_dimension,), embedding_column_b.variable_shape)
コード例 #5
0
 def test_deepcopy(self):
   vocabulary_size = 3
   categorical_column_a = fc_lib.categorical_column_with_identity(
       key='aaa', num_buckets=vocabulary_size)
   categorical_column_b = fc_lib.categorical_column_with_identity(
       key='bbb', num_buckets=vocabulary_size)
   embedding_dimension = 2
   columns = tpu_fc.shared_embedding_columns_v2(
       [categorical_column_b, categorical_column_a],
       dimension=embedding_dimension)
   columns_copy = copy.deepcopy(columns)
   self.assertEqual(
       [column._shared_embedding_collection_name for column in columns],
       [column._shared_embedding_collection_name for column in columns_copy])
コード例 #6
0
    def test_dense_embedding_lookup(self, shared, combiner):
        # Inputs.
        vocabulary_size = 3
        input_sparse_tensor = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1, 3]
            indices=((0, 0), (1, 0), (1, 1), (1, 4)),
            values=(2, 0, 1, 3),
            dense_shape=(2, 5))
        input_features = {'inp': input_sparse_tensor}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.),  # id 2
            (13., 17.)  # id 3
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Build columns.
        categorical_column_input = fc_lib.categorical_column_with_identity(
            key='inp', num_buckets=vocabulary_size)

        # Set tensor_core_shape to be [None, 20] to ensure some padding and
        # dynamic batch size.
        if shared:
            embedding_column = tpu_fc.shared_embedding_columns_v2(
                [categorical_column_input],
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])
        else:
            embedding_column = tpu_fc.embedding_column_v2(
                categorical_column_input,
                dimension=embedding_dimension,
                initializer=_initializer,
                combiner=combiner,
                embedding_lookup_device='tpu_tensor_core',
                tensor_core_shape=[None, 3])

        # Run in TPUInferenceContext so that we hit the intended densification case.
        context = tpu._TPUInferenceContext('tpu_inference')
        context.Enter()

        dense_features = fc_lib.DenseFeatures(embedding_column)
        # Sqrtn combiner not supported for now.
        if combiner == 'sqrtn':
            with self.assertRaisesRegexp(
                    ValueError,
                    'Dense TPU Embedding does not support combiner'):
                embedding_lookup = dense_features(input_features)
            return
        if combiner == 'mean':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (2., 3.5
                 ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
            )
        elif combiner == 'sum':
            expected_lookups = (
                # example 0:
                (7., 11.),  # ids [2], embedding = [7, 11]
                # example 1:
                (4., 7
                 ),  # ids [0, 1], embedding = sum([1, 2] + [3, 5]) = [4, 7]
            )

        embedding_lookup = dense_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        if shared:
            self.assertCountEqual(('inp_shared_embedding:0', ),
                                  tuple([v.name for v in global_vars]))
        else:
            self.assertCountEqual(
                ('dense_features/inp_embedding/embedding_weights:0', ),
                tuple([v.name for v in global_vars]))

        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            eval_res = embedding_lookup.eval()
            self.assertAllEqual(expected_lookups, eval_res)
        context.Exit()
コード例 #7
0
    def test_feature_layer_cpu(self):
        # Inputs.
        vocabulary_size = 3
        input_a = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(2, 2))
        input_b = sparse_tensor.SparseTensorValue(
            # example 0, ids [2]
            # example 1, ids [0, 1]
            # example 2, ids []
            indices=((0, 0), (1, 0), (1, 1)),
            values=(2, 0, 1),
            dense_shape=(3, 2))
        input_features = {'aaa': input_a, 'bbb': input_b}

        # Embedding variable.
        embedding_dimension = 2
        embedding_values = (
            (1., 2.),  # id 0
            (3., 5.),  # id 1
            (7., 11.)  # id 2
        )

        def _initializer(shape, dtype, partition_info=None):
            self.assertAllEqual((vocabulary_size, embedding_dimension), shape)
            self.assertEqual(dtypes.float32, dtype)
            self.assertIsNone(partition_info)
            return embedding_values

        # Expected lookup result, using combiner='mean'.
        expected_lookups_a = (
            # example 0:
            (7., 11.),  # ids [2], embedding = [7, 11]
            # example 1:
            (2., 3.5
             ),  # ids [0, 1], embedding = mean([1, 2] + [3, 5]) = [2, 3.5]
        )
        expected_lookups_b = (
            # example 0:
            (
                (7., 11.),
                (0., 0.),
            ),  # ids [2], embedding = [[7, 11], [0, 0]]
            # example 1:
            (
                (1., 2.),
                (3., 5.),
            ),  # ids [0, 1], embedding = [[1, 2], [3, 5]]
            # example 2:
            (
                (0., 0.),
                (0., 0.),
            ),  # ids [], embedding = [[0, 0], [0, 0]]
        )

        # Build columns.
        categorical_column_a = fc_lib.categorical_column_with_identity(
            key='aaa', num_buckets=vocabulary_size)
        categorical_column_b = fc_lib.sequence_categorical_column_with_identity(
            key='bbb', num_buckets=vocabulary_size)
        embedding_column_a, embedding_column_b = tpu_fc.shared_embedding_columns_v2(
            [categorical_column_a, categorical_column_b],
            dimension=embedding_dimension,
            initializer=_initializer,
            max_sequence_lengths=[0, 2])

        # Provide sparse input and get dense result.
        dense_features = fc_lib.DenseFeatures([embedding_column_a])
        sequence_features = fc_lib.SequenceFeatures([embedding_column_b])
        embedding_lookup_a = dense_features(input_features)
        embedding_lookup_b = sequence_features(input_features)

        # Assert expected embedding variable and lookups.
        global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        self.assertItemsEqual(('aaa_bbb_shared_embedding:0', ),
                              tuple([v.name for v in global_vars]))
        embedding_var = global_vars[0]
        with _initialized_session():
            self.assertAllEqual(embedding_values, embedding_var.eval())
            self.assertAllEqual(expected_lookups_a, embedding_lookup_a.eval())
            self.assertAllEqual(expected_lookups_b,
                                embedding_lookup_b[0].eval())