Exemple #1
0
 def testApplyBucketsWithNans(self):
   inputs = tf.constant([4.0, float('nan'), float('-inf'), 7.5, 10.0])
   quantiles = tf.constant([2, 5, 8])
   # TODO(b/148278398): NaN is mapped to the highest bucket. Determine
   # if this is the right behavior.
   expected_outputs = tf.constant([1, 3, 0, 2, 3], dtype=tf.int64)
   bucketized = mappers.apply_buckets(inputs, [quantiles])
   self.assertAllEqual(bucketized, expected_outputs)
    def test_bucketization_annotation(self):
        # TODO(b/132098015): Schema annotations aren't yet supported in OSS builds.
        # pylint: disable=g-import-not-at-top
        try:
            from tensorflow_transform import annotations_pb2
        except ImportError:
            return
        # pylint: enable=g-import-not-at-top
        with tf.Graph().as_default() as graph:
            inputs = {
                'foo': tf.convert_to_tensor([0, 1, 2, 3]),
                'bar': tf.convert_to_tensor([0, 2, 0, 2]),
            }
            boundaries_foo = tf.expand_dims(tf.convert_to_tensor([.5, 1.5]),
                                            axis=0)
            boundaries_bar = tf.expand_dims(tf.convert_to_tensor([.1, .2]),
                                            axis=0)
            outputs = {}

            # tft.apply_buckets will annotate the feature in the output schema to
            # indicate the bucket boundaries that were applied.
            outputs['Bucketized_foo'] = mappers.apply_buckets(
                inputs['foo'], boundaries_foo)
            outputs['Bucketized_bar'] = mappers.apply_buckets(
                inputs['bar'], boundaries_bar)
            # Create a session to actually evaluate the annotations and extract the
            # the output schema with annotations applied.
            with tf.compat.v1.Session(graph=graph) as session:
                schema = schema_inference.infer_feature_schema(
                    outputs, graph, session)
                self.assertLen(schema.feature, 2)
                for feature in schema.feature:
                    self.assertLen(feature.annotation.extra_metadata, 1)
                    for annotation in feature.annotation.extra_metadata:

                        # Extract the annotated message and validate its contents
                        message = annotations_pb2.BucketBoundaries()
                        annotation.Unpack(message)
                        if feature.name == 'Bucketized_foo':
                            self.assertAllClose(list(message.boundaries),
                                                [.5, 1.5])
                        elif feature.name == 'Bucketized_bar':
                            self.assertAllClose(list(message.boundaries),
                                                [.1, .2])
                        else:
                            raise RuntimeError('Unexpected features in schema')
 def preprocessing_fn(_):
     inputs = {
         'foo': tf.convert_to_tensor([0, 1, 2, 3]),
         'bar': tf.convert_to_tensor([0, 2, 0, 2]),
     }
     boundaries_foo = tf.expand_dims(tf.convert_to_tensor([.5, 1.5]),
                                     axis=0)
     boundaries_bar = tf.expand_dims(tf.convert_to_tensor([.1, .2]),
                                     axis=0)
     outputs = {}
     # tft.apply_buckets will annotate the feature in the output schema to
     # indicate the bucket boundaries that were applied.
     outputs['Bucketized_foo'] = mappers.apply_buckets(
         inputs['foo'], boundaries_foo)
     outputs['Bucketized_bar'] = mappers.apply_buckets(
         inputs['bar'], boundaries_bar)
     return outputs
Exemple #4
0
 def testApplybucketsToSparseTensor(self):
     inputs = tf.SparseTensor(indices=[[0, 0, 0], [0, 1, 1], [2, 2, 2]],
                              values=[10, 20, -1],
                              dense_shape=[3, 3, 4])
     quantiles = [-10, 0, 13]
     bucketized = mappers.apply_buckets(inputs, [quantiles])
     self.assertSparseOutput(inputs.indices,
                             tf.constant([2, 3, 1]),
                             inputs.dense_shape,
                             bucketized,
                             close_values=False)
Exemple #5
0
 def testApplybucketsToRaggedTensor(self):
   inputs = tf.RaggedTensor.from_row_splits(
       values=tf.RaggedTensor.from_row_splits(
           values=[10, 20, -1], row_splits=[0, 1, 1, 2, 2, 3]),
       row_splits=[0, 1, 1, 2, 3, 5])
   quantiles = [-10, 0, 13]
   expected_bucketized = tf.RaggedTensor.from_row_splits(
       values=tf.RaggedTensor.from_row_splits(
           values=[2, 3, 1], row_splits=[0, 1, 1, 2, 2, 3]),
       row_splits=[0, 1, 1, 2, 3, 5])
   bucketized = mappers.apply_buckets(inputs, [quantiles])
   self.assertAllEqual(expected_bucketized, bucketized)
Exemple #6
0
 def testApplyBucketsSmall(self):
     inputs = tf.constant(4)
     quantiles = tf.constant([5])
     expected_outputs = tf.constant(0, dtype=tf.int64)
     bucketized = mappers.apply_buckets(inputs, [quantiles])
     self.assertAllEqual(bucketized, expected_outputs)
Exemple #7
0
 def testApplyBucketsWithInfBoundary(self):
   inputs = tf.constant([4.0, float('-inf'), .8, 7.5, 10.0])
   quantiles = tf.constant([float('-inf'), 2, 5, 8])
   expected_outputs = tf.constant([2, 1, 1, 3, 4], dtype=tf.int64)
   bucketized = mappers.apply_buckets(inputs, [quantiles])
   self.assertAllEqual(bucketized, expected_outputs)
Exemple #8
0
 def testApplyBuckets(self, x, bucket_boundaries, expected_buckets):
   x = tf.constant(x)
   bucket_boundaries = tf.constant(bucket_boundaries)
   expected_buckets = tf.constant(expected_buckets, dtype=tf.int64)
   buckets = mappers.apply_buckets(x, bucket_boundaries)
   self.assertAllEqual(buckets, expected_buckets)