def testApplyBucketsWithNans(self): inputs = tf.constant([4.0, float('nan'), float('-inf'), 7.5, 10.0]) quantiles = tf.constant([2, 5, 8]) # TODO(b/148278398): NaN is mapped to the highest bucket. Determine # if this is the right behavior. expected_outputs = tf.constant([1, 3, 0, 2, 3], dtype=tf.int64) bucketized = mappers.apply_buckets(inputs, [quantiles]) self.assertAllEqual(bucketized, expected_outputs)
def test_bucketization_annotation(self): # TODO(b/132098015): Schema annotations aren't yet supported in OSS builds. # pylint: disable=g-import-not-at-top try: from tensorflow_transform import annotations_pb2 except ImportError: return # pylint: enable=g-import-not-at-top with tf.Graph().as_default() as graph: inputs = { 'foo': tf.convert_to_tensor([0, 1, 2, 3]), 'bar': tf.convert_to_tensor([0, 2, 0, 2]), } boundaries_foo = tf.expand_dims(tf.convert_to_tensor([.5, 1.5]), axis=0) boundaries_bar = tf.expand_dims(tf.convert_to_tensor([.1, .2]), axis=0) outputs = {} # tft.apply_buckets will annotate the feature in the output schema to # indicate the bucket boundaries that were applied. outputs['Bucketized_foo'] = mappers.apply_buckets( inputs['foo'], boundaries_foo) outputs['Bucketized_bar'] = mappers.apply_buckets( inputs['bar'], boundaries_bar) # Create a session to actually evaluate the annotations and extract the # the output schema with annotations applied. with tf.compat.v1.Session(graph=graph) as session: schema = schema_inference.infer_feature_schema( outputs, graph, session) self.assertLen(schema.feature, 2) for feature in schema.feature: self.assertLen(feature.annotation.extra_metadata, 1) for annotation in feature.annotation.extra_metadata: # Extract the annotated message and validate its contents message = annotations_pb2.BucketBoundaries() annotation.Unpack(message) if feature.name == 'Bucketized_foo': self.assertAllClose(list(message.boundaries), [.5, 1.5]) elif feature.name == 'Bucketized_bar': self.assertAllClose(list(message.boundaries), [.1, .2]) else: raise RuntimeError('Unexpected features in schema')
def preprocessing_fn(_): inputs = { 'foo': tf.convert_to_tensor([0, 1, 2, 3]), 'bar': tf.convert_to_tensor([0, 2, 0, 2]), } boundaries_foo = tf.expand_dims(tf.convert_to_tensor([.5, 1.5]), axis=0) boundaries_bar = tf.expand_dims(tf.convert_to_tensor([.1, .2]), axis=0) outputs = {} # tft.apply_buckets will annotate the feature in the output schema to # indicate the bucket boundaries that were applied. outputs['Bucketized_foo'] = mappers.apply_buckets( inputs['foo'], boundaries_foo) outputs['Bucketized_bar'] = mappers.apply_buckets( inputs['bar'], boundaries_bar) return outputs
def testApplybucketsToSparseTensor(self): inputs = tf.SparseTensor(indices=[[0, 0, 0], [0, 1, 1], [2, 2, 2]], values=[10, 20, -1], dense_shape=[3, 3, 4]) quantiles = [-10, 0, 13] bucketized = mappers.apply_buckets(inputs, [quantiles]) self.assertSparseOutput(inputs.indices, tf.constant([2, 3, 1]), inputs.dense_shape, bucketized, close_values=False)
def testApplybucketsToRaggedTensor(self): inputs = tf.RaggedTensor.from_row_splits( values=tf.RaggedTensor.from_row_splits( values=[10, 20, -1], row_splits=[0, 1, 1, 2, 2, 3]), row_splits=[0, 1, 1, 2, 3, 5]) quantiles = [-10, 0, 13] expected_bucketized = tf.RaggedTensor.from_row_splits( values=tf.RaggedTensor.from_row_splits( values=[2, 3, 1], row_splits=[0, 1, 1, 2, 2, 3]), row_splits=[0, 1, 1, 2, 3, 5]) bucketized = mappers.apply_buckets(inputs, [quantiles]) self.assertAllEqual(expected_bucketized, bucketized)
def testApplyBucketsSmall(self): inputs = tf.constant(4) quantiles = tf.constant([5]) expected_outputs = tf.constant(0, dtype=tf.int64) bucketized = mappers.apply_buckets(inputs, [quantiles]) self.assertAllEqual(bucketized, expected_outputs)
def testApplyBucketsWithInfBoundary(self): inputs = tf.constant([4.0, float('-inf'), .8, 7.5, 10.0]) quantiles = tf.constant([float('-inf'), 2, 5, 8]) expected_outputs = tf.constant([2, 1, 1, 3, 4], dtype=tf.int64) bucketized = mappers.apply_buckets(inputs, [quantiles]) self.assertAllEqual(bucketized, expected_outputs)
def testApplyBuckets(self, x, bucket_boundaries, expected_buckets): x = tf.constant(x) bucket_boundaries = tf.constant(bucket_boundaries) expected_buckets = tf.constant(expected_buckets, dtype=tf.int64) buckets = mappers.apply_buckets(x, bucket_boundaries) self.assertAllEqual(buckets, expected_buckets)