Python is_ragged_feature_available Examples

Programming Language: Python

Namespace/Package Name: tensorflow_transform.common_types

Method/Function: is_ragged_feature_available

Examples at hotexamples.com: 7

Python is_ragged_feature_available - 7 examples found. These are the top rated real world Python examples of tensorflow_transform.common_types.is_ragged_feature_available extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: schema_inference.py Project: tensorflow/transform

def _ragged_feature_spec_from_batched_tensor(
    name: str, tensor: tf.RaggedTensor
) -> Union[tf.io.VarLenFeature, common_types.RaggedFeature]:
    """Infer `tf.io.RaggedFeature` from a batched `tf.RaggedTensor`."""
    if common_types.is_ragged_feature_available():
        logging.warn(
            'Feature %s is a RaggedTensor, its support is currently '
            'experimental', name)

        partitions = []
        row_lengths_partition_idx = 1
        # Ignore batch dimension.
        for dim in tensor.values.shape[1:]:
            if dim or dim == 0:
                partitions.append(
                    tf.io.RaggedFeature.UniformRowLength(  # pytype: disable=attribute-error
                        length=dim))
            else:
                partitions.append(
                    tf.io.RaggedFeature.RowLengths(  # pytype: disable=attribute-error
                        key='{}$row_lengths_{}'.format(
                            name, row_lengths_partition_idx)))
                row_lengths_partition_idx += 1

        return tf.io.RaggedFeature(dtype=tensor.dtype,
                                   value_key='{}$ragged_values'.format(name),
                                   partitions=partitions,
                                   row_splits_dtype=tensor.row_splits.dtype)
    else:
        logging.warn(
            'Feature %s was a RaggedTensor.  A Schema will be generated but the '
            'Schema cannot be used with a coder (e.g. to materialize output '
            'data) or to generated a feature spec.', name)
        # Arbitrarily select VarLenFeature.
        return tf.io.VarLenFeature(tensor.dtype)

Example #2

Show file

File: example_proto_coder_test.py Project: tensorflow/transform

def _maybe_extend_encode_case_with_ragged(encode_case):
    result = copy.deepcopy(encode_case)
    ragged_ascii_proto = result.pop('ragged_ascii_proto', '}')
    ragged_instance = result.pop('ragged_instance', {})
    if common_types.is_ragged_feature_available():
        result['ascii_proto'] = (encode_case['ascii_proto'][:-1] +
                                 ragged_ascii_proto)
        result['instance'].update(ragged_instance)
    return result

Example #3

Show file

def _ragged_tensor_representation_as_feature_spec(
    name: str, tensor_representation: schema_pb2.TensorRepresentation,
    feature_by_name: Dict[str, schema_pb2.Feature],
    string_domains: Dict[str, common_types.DomainType]
) -> Tuple[common_types.RaggedFeature, Optional[common_types.DomainType]]:
    """Returns a representation of a RaggedTensor as a feature spec."""
    if not common_types.is_ragged_feature_available():
        raise ValueError('RaggedFeature is not supported in TF 1.x.')

    value_feature = pop_ragged_source_columns(name, tensor_representation,
                                              feature_by_name)
    spec = tensor_representation_util.CreateTfExampleParserConfig(
        tensor_representation, value_feature.type)
    domain = _get_domain(value_feature, string_domains)
    return typing.cast(common_types.RaggedFeature, spec), domain

Example #4

Show file

File: example_proto_coder_test.py Project: tensorflow/transform

}""",
         instance={'varlen_string': [b'foo', b'bar']}),
}

_ENCODE_ERROR_CASES = [
    dict(testcase_name='to_few_values',
         feature_spec={
             '2d_vector_feature': tf.io.FixedLenFeature([2, 2], tf.int64),
         },
         instance={'2d_vector_feature': [1, 2, 3]},
         error_msg='got wrong number of values'),
]

# TODO(b/160294509): Move these to the initial definition once TF 1.x support is
# dropped.
if common_types.is_ragged_feature_available():
    _FEATURE_SPEC.update({
        'ragged_feature':
        tf.io.RaggedFeature(
            tf.float32,
            value_key='ragged_val',
            partitions=[tf.io.RaggedFeature.RowLengths('ragged_row_lengths1')
                        ]),
        '2d_ragged_feature':
        tf.io.RaggedFeature(
            tf.string,
            value_key='2d_ragged_val',
            partitions=[
                tf.io.RaggedFeature.RowLengths('2d_ragged_row_lengths1'),
                tf.io.RaggedFeature.RowLengths('2d_ragged_row_lengths2')
            ]),

Example #5

Show file

File: impl_helper_test.py Project: tensorflow/transform

class ImplHelperTest(test_case.TransformTestCase):
    def test_batched_placeholders_from_feature_spec(self):
        feature_spec = {
            'fixed_len_float':
            tf.io.FixedLenFeature([2, 3], tf.float32),
            'fixed_len_string':
            tf.io.FixedLenFeature([], tf.string),
            '_var_len_underscored':
            tf.io.VarLenFeature(tf.string),
            'var_len_int':
            tf.io.VarLenFeature(tf.int64),
            'sparse_1d':
            tf.io.SparseFeature('1d_idx', '1d_value', tf.int64, 7),
            'sparse_2d':
            tf.io.SparseFeature(['2d_idx0', '2d_idx1'], '2d_value', tf.int64,
                                [2, 17]),
        }
        with tf.compat.v1.Graph().as_default():
            features = impl_helper.batched_placeholders_from_specs(
                feature_spec)
        self.assertCountEqual(features.keys(), [
            'fixed_len_float',
            'fixed_len_string',
            'var_len_int',
            '_var_len_underscored',
            'sparse_1d',
            'sparse_2d',
        ])
        self.assertEqual(type(features['fixed_len_float']), tf.Tensor)
        self.assertEqual(features['fixed_len_float'].get_shape().as_list(),
                         [None, 2, 3])
        self.assertEqual(type(features['fixed_len_string']), tf.Tensor)
        self.assertEqual(features['fixed_len_string'].get_shape().as_list(),
                         [None])
        self.assertEqual(type(features['var_len_int']), tf.SparseTensor)
        self.assertEqual(features['var_len_int'].get_shape().as_list(),
                         [None, None])
        self.assertEqual(type(features['_var_len_underscored']),
                         tf.SparseTensor)
        self.assertEqual(
            features['_var_len_underscored'].get_shape().as_list(),
            [None, None])
        self.assertEqual(type(features['sparse_1d']), tf.SparseTensor)
        self.assertEqual(type(features['sparse_2d']), tf.SparseTensor)
        if version.parse(tf.__version__) >= version.parse('2'):
            self.assertEqual(features['sparse_1d'].get_shape().as_list(),
                             [None, 7])
            self.assertEqual(features['sparse_2d'].get_shape().as_list(),
                             [None, 2, 17])
        else:
            self.assertEqual(features['sparse_1d'].get_shape().as_list(),
                             [None, None])
            self.assertEqual(features['sparse_2d'].get_shape().as_list(),
                             [None, None, None])

    def test_batched_placeholders_from_typespecs(self):
        typespecs = {
            'dense_float':
            tf.TensorSpec(dtype=tf.float32, shape=[None, 2, 3]),
            'dense_string':
            tf.TensorSpec(shape=[None], dtype=tf.string),
            '_sparse_underscored':
            tf.SparseTensorSpec(dtype=tf.string, shape=[None, None, 17]),
            'ragged_string':
            tf.RaggedTensorSpec(dtype=tf.string,
                                ragged_rank=1,
                                shape=[None, None]),
            'ragged_multi_dimension':
            tf.RaggedTensorSpec(dtype=tf.int64,
                                ragged_rank=3,
                                shape=[None, None, None, None, 5]),
        }
        with tf.compat.v1.Graph().as_default():
            features = impl_helper.batched_placeholders_from_specs(typespecs)
        self.assertCountEqual(features.keys(), [
            'dense_float',
            'dense_string',
            '_sparse_underscored',
            'ragged_string',
            'ragged_multi_dimension',
        ])
        self.assertEqual(type(features['dense_float']), tf.Tensor)
        self.assertEqual(features['dense_float'].get_shape().as_list(),
                         [None, 2, 3])
        self.assertEqual(features['dense_float'].dtype, tf.float32)

        self.assertEqual(type(features['dense_string']), tf.Tensor)
        self.assertEqual(features['dense_string'].get_shape().as_list(),
                         [None])
        self.assertEqual(features['dense_string'].dtype, tf.string)

        self.assertEqual(type(features['_sparse_underscored']),
                         tf.SparseTensor)
        # TODO(zoyahav): Change last dimension size to 17 once SparseTensors propogate
        # static dense_shape from typespec correctly.
        self.assertEqual(features['_sparse_underscored'].get_shape().as_list(),
                         [None, None, None])
        self.assertEqual(features['_sparse_underscored'].dtype, tf.string)

        self.assertEqual(type(features['ragged_string']), tf.RaggedTensor)
        self.assertEqual(features['ragged_string'].shape.as_list(),
                         [None, None])
        self.assertEqual(features['ragged_string'].ragged_rank, 1)
        self.assertEqual(features['ragged_string'].dtype, tf.string)

        self.assertEqual(type(features['ragged_multi_dimension']),
                         tf.RaggedTensor)
        self.assertEqual(features['ragged_multi_dimension'].shape.as_list(),
                         [None, None, None, None, 5])
        self.assertEqual(features['ragged_multi_dimension'].ragged_rank, 3)
        self.assertEqual(features['ragged_multi_dimension'].dtype, tf.int64)

    def test_batched_placeholders_from_specs_invalid_dtype(self):
        with self.assertRaisesRegexp(ValueError, 'had invalid dtype'):
            impl_helper.batched_placeholders_from_specs(
                {'f': tf.TensorSpec(dtype=tf.int32, shape=[None])})
        with self.assertRaisesRegexp(ValueError, 'had invalid dtype'):
            impl_helper.batched_placeholders_from_specs(
                {'f': tf.io.FixedLenFeature(dtype=tf.int32, shape=[None])})

    def test_batched_placeholders_from_specs_invalid_mixing(self):
        with self.assertRaisesRegexp(TypeError, 'Specs must be all'):
            impl_helper.batched_placeholders_from_specs({
                'f1':
                tf.TensorSpec(dtype=tf.int64, shape=[None]),
                'f2':
                tf.io.FixedLenFeature(dtype=tf.int64, shape=[None]),
            })

    @test_case.named_parameters(*test_case.cross_named_parameters(
        _ROUNDTRIP_CASES, [
            dict(testcase_name='eager_tensors', feed_eager_tensors=True),
            dict(testcase_name='session_run_values', feed_eager_tensors=False)
        ]))
    def test_to_instance_dicts(self, feature_spec, instances, record_batch,
                               feed_dict, feed_eager_tensors):
        del record_batch
        if feed_eager_tensors:
            test_case.skip_if_not_tf2('Tensorflow 2.x required')
        schema = schema_utils.schema_from_feature_spec(feature_spec)
        feed_dict_local = (_eager_tensor_from_values(feed_dict)
                           if feed_eager_tensors else copy.copy(feed_dict))
        result = impl_helper.to_instance_dicts(schema, feed_dict_local)
        np.testing.assert_equal(instances, result)

    @test_case.named_parameters(*_TO_INSTANCE_DICT_ERROR_CASES)
    def test_to_instance_dicts_error(self,
                                     feature_spec,
                                     feed_dict,
                                     error_msg,
                                     error_type=ValueError):
        schema = schema_utils.schema_from_feature_spec(feature_spec)
        with self.assertRaisesRegexp(error_type, error_msg):
            impl_helper.to_instance_dicts(schema, feed_dict)

    @test_case.named_parameters(*test_case.cross_named_parameters(
        _ROUNDTRIP_CASES, [
            dict(testcase_name='eager_tensors', feed_eager_tensors=True),
            dict(testcase_name='session_run_values', feed_eager_tensors=False)
        ]))
    def test_convert_to_arrow(self, feature_spec, instances, record_batch,
                              feed_dict, feed_eager_tensors):
        del instances
        if feed_eager_tensors:
            test_case.skip_if_not_tf2('Tensorflow 2.x required')
        schema = schema_utils.schema_from_feature_spec(feature_spec)
        converter = impl_helper.make_tensor_to_arrow_converter(schema)
        feed_dict_local = (_eager_tensor_from_values(feed_dict)
                           if feed_eager_tensors else copy.copy(feed_dict))
        arrow_columns, arrow_schema = impl_helper.convert_to_arrow(
            schema, converter, feed_dict_local)
        actual = pa.RecordBatch.from_arrays(arrow_columns, schema=arrow_schema)
        expected = pa.RecordBatch.from_arrays(list(record_batch.values()),
                                              names=list(record_batch.keys()))
        np.testing.assert_equal(actual.to_pydict(), expected.to_pydict())

    @test_case.named_parameters(*_CONVERT_TO_ARROW_ERROR_CASES)
    def test_convert_to_arrow_error(self,
                                    feature_spec,
                                    feed_dict,
                                    error_msg,
                                    error_type=ValueError):
        schema = schema_utils.schema_from_feature_spec(feature_spec)
        converter = impl_helper.make_tensor_to_arrow_converter(schema)
        with self.assertRaisesRegexp(error_type, error_msg):
            impl_helper.convert_to_arrow(schema, converter, feed_dict)

    @test_case.named_parameters(
        dict(testcase_name='tf_compat_v1', force_tf_compat_v1=True),
        dict(testcase_name='native_tf2', force_tf_compat_v1=False))
    def test_analyze_in_place(self, force_tf_compat_v1):
        if not force_tf_compat_v1:
            test_case.skip_if_not_tf2('Tensorflow 2.x required')

        def preprocessing_fn(inputs):
            return {'x_add_1': inputs['x'] + 1}

        feature_spec = {'x': tf.io.FixedLenFeature([], tf.int64)}
        type_spec = {
            'x': tf.TensorSpec(dtype=tf.int64, shape=[
                None,
            ])
        }
        output_path = os.path.join(self.get_temp_dir(), self._testMethodName)
        impl_helper.analyze_in_place(preprocessing_fn, force_tf_compat_v1,
                                     feature_spec, type_spec, output_path)

        tft_output = TFTransformOutput(output_path)
        expected_value = np.array([2], dtype=np.int64)
        if force_tf_compat_v1:
            with tf.Graph().as_default() as graph:
                with tf.compat.v1.Session(graph=graph).as_default():
                    transformed_features = tft_output.transform_raw_features(
                        {'x': tf.constant([1], dtype=tf.int64)})
                    transformed_value = transformed_features['x_add_1'].eval()
        else:
            transformed_features = tft_output.transform_raw_features(
                {'x': tf.constant([1], dtype=tf.int64)})
            transformed_value = transformed_features['x_add_1'].numpy()
        self.assertEqual(transformed_value, expected_value)

        transformed_feature_spec = tft_output.transformed_feature_spec()
        expected_feature_spec = feature_spec = {
            'x_add_1': tf.io.FixedLenFeature([], tf.int64)
        }
        self.assertEqual(transformed_feature_spec, expected_feature_spec)

    @test_case.named_parameters(
        dict(testcase_name='tf_compat_v1', force_tf_compat_v1=True),
        dict(testcase_name='native_tf2', force_tf_compat_v1=False))
    def test_analyze_in_place_with_analyzers_raises_error(
            self, force_tf_compat_v1):
        if not force_tf_compat_v1:
            test_case.skip_if_not_tf2('Tensorflow 2.x required')

        def preprocessing_fn(inputs):
            return {'x_add_1': analyzers.mean(inputs['x'])}

        feature_spec = {'x': tf.io.FixedLenFeature([], tf.int64)}
        type_spec = {
            'x': tf.TensorSpec(dtype=tf.int64, shape=[
                None,
            ])
        }
        output_path = os.path.join(self.get_temp_dir(), self._testMethodName)
        with self.assertRaisesRegexp(RuntimeError,
                                     'analyzers found when tracing'):
            impl_helper.analyze_in_place(preprocessing_fn, force_tf_compat_v1,
                                         feature_spec, type_spec, output_path)

    @test_case.named_parameters(
        dict(testcase_name='_3d',
             sparse_value=tf.compat.v1.SparseTensorValue(
                 indices=np.array([[0, 0, 1], [0, 1, 2], [1, 1, 1]]),
                 values=np.array([0, 1, 2]),
                 dense_shape=np.array([2, 2, 3])),
             expected_indices=[[np.array([0, 1]),
                                np.array([1, 2])],
                               [np.array([1]), np.array([1])]],
             expected_values=[np.array([0, 1]),
                              np.array([2])]),
        dict(testcase_name='_4d',
             sparse_value=tf.compat.v1.SparseTensorValue(
                 indices=np.array([[0, 0, 0, 1], [0, 1, 0, 2], [1, 1, 1, 1]]),
                 values=np.array([0, 1, 2]),
                 dense_shape=np.array([2, 2, 2, 3])),
             expected_indices=[[
                 np.array([0, 1]),
                 np.array([0, 0]),
                 np.array([1, 2])
             ], [np.array([1]), np.array([1]),
                 np.array([1])]],
             expected_values=[np.array([0, 1]),
                              np.array([2])]),
    )
    def test_decompose_sparse_batch(self, sparse_value, expected_indices,
                                    expected_values):
        indices, values = impl_helper._decompose_sparse_batch(sparse_value)
        self.assertLen(indices, len(expected_indices))
        self.assertLen(values, len(expected_values))
        for idx, (a, b) in enumerate(zip(expected_indices, indices)):
            self.assertAllEqual(
                a, b, 'Indices are different at index {}'.format(idx))
        for idx, (a, b) in enumerate(zip(expected_values, values)):
            self.assertAllEqual(a, b,
                                'Values are different at index {}'.format(idx))

    def test_get_num_values_per_instance_in_sparse_batch(self):
        batch_indices = np.array([[idx % 4, 0, 1, 2] for idx in range(100)])
        num_values = impl_helper._get_num_values_per_instance_in_sparse_batch(
            batch_indices, 27)
        expected_num_values = [25, 25, 25, 25] + [0] * 23
        self.assertEqual(expected_num_values, num_values)

    @test_case.named_parameters(
        dict(
            testcase_name='_3d',
            ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
                values=tf.compat.v1.ragged.RaggedTensorValue(
                    values=tf.compat.v1.ragged.RaggedTensorValue(
                        values=np.array([10., 20., 30.]),
                        row_splits=np.array([0, 0, 1, 3])),  # row_lengths2
                    row_splits=np.array([0, 1, 1, 3])),  # row_lengths1
                row_splits=np.array([0, 2, 3])),  # batch dimension
            # pytype: disable=attribute-error
            spec=tf.io.RaggedFeature(  # pylint: disable=g-long-ternary
                tf.float32,
                value_key='ragged_3d_val',
                partitions=[
                    tf.io.RaggedFeature.RowLengths('ragged_3d_row_lengths1'),
                    tf.io.RaggedFeature.RowLengths('ragged_3d_row_lengths2'),
                ]) if common_types.is_ragged_feature_available() else None,
            # pytype: enable=attribute-error
            expected_components={
                'ragged_3d_val':
                [np.array([], dtype=np.float32),
                 np.array([10., 20., 30.])],
                'ragged_3d_row_lengths1': [np.array([1, 0]),
                                           np.array([2])],
                'ragged_3d_row_lengths2': [np.array([0]),
                                           np.array([1, 2])],
            },
        ),
        dict(
            testcase_name='_4d',
            ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
                values=tf.compat.v1.ragged.RaggedTensorValue(
                    values=tf.compat.v1.ragged.RaggedTensorValue(
                        values=tf.compat.v1.ragged.RaggedTensorValue(
                            values=np.array([b'a', b'b', b'c', b'd']),
                            row_splits=np.array([0, 1, 1, 3,
                                                 4])),  # row_lengths3
                        row_splits=np.array([0, 2, 2, 4])),  # row_lengths2
                    row_splits=np.array([0, 1, 1, 3])),  # row_lengths1
                row_splits=np.array([0, 2, 2, 3])),  # batch dimension
            # pytype: disable=attribute-error
            spec=tf.io.RaggedFeature(  # pylint: disable=g-long-ternary
                tf.float32,
                value_key='ragged_4d_val',
                partitions=[
                    tf.io.RaggedFeature.RowLengths('ragged_4d_row_lengths1'),
                    tf.io.RaggedFeature.RowLengths('ragged_4d_row_lengths2'),
                    tf.io.RaggedFeature.RowLengths('ragged_4d_row_lengths3'),
                ]) if common_types.is_ragged_feature_available() else None,
            # pytype: enable=attribute-error
            expected_components={
                'ragged_4d_val': [
                    np.array([b'a']),
                    np.array([], dtype=object),
                    np.array([b'b', b'c', b'd'])
                ],
                'ragged_4d_row_lengths1':
                [np.array([1, 0]),
                 np.array([]), np.array([2])],
                'ragged_4d_row_lengths2':
                [np.array([2]), np.array([]),
                 np.array([0, 2])],
                'ragged_4d_row_lengths3':
                [np.array([1, 0]),
                 np.array([]),
                 np.array([2, 1])],
            },
        ))
    def test_handle_ragged_batch(self, ragged_tensor, spec,
                                 expected_components):
        test_case.skip_if_not_tf2('RaggedFeature is not available in TF 1.x')
        result = impl_helper._handle_ragged_batch(ragged_tensor,
                                                  spec,
                                                  name='ragged')
        np.testing.assert_equal(result, expected_components)

Example #6

Show file

File: schema_inference_test.py Project: tensorflow/transform

    def test_infer_feature_schema_with_ragged_tensor(self, use_compat_v1):
        if not use_compat_v1:
            test_case.skip_if_not_tf2('Tensorflow 2.x required')

        def preprocessing_fn(_):
            return {
                'foo':
                tf.RaggedTensor.from_row_splits(values=tf.constant(
                    [3, 1, 4, 1, 5, 9, 2, 6], tf.int64),
                                                row_splits=[0, 4, 4, 7, 8, 8]),
                'bar':
                tf.RaggedTensor.from_row_splits(
                    values=tf.RaggedTensor.from_row_splits(
                        values=tf.ones([5], tf.float32),
                        row_splits=[0, 2, 3, 5]),
                    row_splits=[0, 0, 0, 2, 2, 4]),
                'baz':
                tf.RaggedTensor.from_row_splits(values=tf.ones([5, 3],
                                                               tf.float32),
                                                row_splits=[0, 2, 3, 5]),
                'qux':
                tf.RaggedTensor.from_row_splits(
                    values=tf.RaggedTensor.from_row_splits(
                        values=tf.ones([5, 7], tf.float32),
                        row_splits=[0, 2, 3, 5]),
                    row_splits=[0, 0, 0, 2, 2, 4]),
            }

        schema = self._get_schema(preprocessing_fn,
                                  use_compat_v1,
                                  create_session=True)
        if common_types.is_ragged_feature_available():
            expected_schema_ascii = """
        feature {
          name: "bar$ragged_values"
          type: FLOAT
        }
        feature {
          name: "bar$row_lengths_1"
          type: INT
        }
        feature {
          name: "baz$ragged_values"
          type: FLOAT
        }
        feature {
          name: "foo$ragged_values"
          type: INT
        }
        feature {
          name: "qux$ragged_values"
          type: FLOAT
        }
        feature {
          name: "qux$row_lengths_1"
          type: INT
        }
        tensor_representation_group {
          key: ""
          value {
            tensor_representation {
              key: "foo"
              value {
                ragged_tensor {
                  feature_path { step: "foo$ragged_values" }
                }
              }
            }
            tensor_representation {
              key: "bar"
              value {
                ragged_tensor {
                  feature_path { step: "bar$ragged_values" }
                  partition { row_length: "bar$row_lengths_1"}
                }
              }
            }
            tensor_representation {
              key: "baz"
              value {
                ragged_tensor {
                  feature_path { step: "baz$ragged_values" }
                  partition { uniform_row_length: 3}
                }
              }
            }
            tensor_representation {
              key: "qux"
              value {
                ragged_tensor {
                  feature_path { step: "qux$ragged_values" }
                  partition { row_length: "qux$row_lengths_1"}
                  partition { uniform_row_length: 7}
                }
              }
            }
          }
        }
        """
        else:
            expected_schema_ascii = """
        feature {
          name: "bar"
          type: FLOAT
          annotation {
            tag: "ragged_tensor"
          }
        }
        feature {
          name: "baz"
          type: FLOAT
          annotation {
            tag: "ragged_tensor"
          }
        }
        feature {
          name: "foo"
          type: INT
          annotation {
            tag: "ragged_tensor"
          }
        }
        feature {
          name: "qux"
          type: FLOAT
          annotation {
            tag: "ragged_tensor"
          }
        }
        """
        expected_schema = text_format.Parse(expected_schema_ascii,
                                            schema_pb2.Schema())
        schema_utils_legacy.set_generate_legacy_feature_spec(
            expected_schema, False)
        self.assertProtoEquals(expected_schema, schema)
        if not common_types.is_ragged_feature_available():
            with self.assertRaisesRegexp(
                    ValueError, 'Feature "bar" had tag "ragged_tensor"'):
                schema_utils.schema_as_feature_spec(schema)

Example #7

Show file

File: schema_inference.py Project: tensorflow/transform

def _infer_feature_schema_common(
        features: Mapping[str, common_types.TensorType],
        tensor_ranges: Mapping[str, Tuple[int, int]],
        feature_annotations: Mapping[str, List[any_pb2.Any]],
        global_annotations: List[any_pb2.Any],
        is_evaluation_complete: bool) -> schema_pb2.Schema:
    """Given a dict of tensors, creates a `Schema`.

  Args:
    features: A dict mapping column names to `Tensor`, `SparseTensor` or
      `RaggedTensor`. The `Tensor`, `SparseTensor` or `RaggedTensor` should have
      a 0'th dimension which is interpreted as the batch dimension.
    tensor_ranges: A dict mapping a tensor to a tuple containing its min and max
      value.
    feature_annotations: dictionary from feature name to list of any_pb2.Any
      protos to be added as an annotation for that feature in the schema.
    global_annotations: list of any_pb2.Any protos to be added at the global
      schema level.
    is_evaluation_complete: A boolean indicating whether all analyzers have been
      evaluated or not.

  Returns:
    A `Schema` proto.
  """
    domains = {}
    feature_tags = collections.defaultdict(list)
    for name, tensor in features.items():
        if (isinstance(tensor, tf.RaggedTensor)
                and not common_types.is_ragged_feature_available()):
            # Add the 'ragged_tensor' tag which will cause coder and
            # schema_as_feature_spec to raise an error, as there is no feature spec
            # for ragged tensors in TF 1.x.
            feature_tags[name].append(schema_utils.RAGGED_TENSOR_TAG)
        if name in tensor_ranges:
            min_value, max_value = tensor_ranges[name]
            domains[name] = schema_pb2.IntDomain(min=min_value,
                                                 max=max_value,
                                                 is_categorical=True)
    feature_spec = _feature_spec_from_batched_tensors(features,
                                                      is_evaluation_complete)

    schema_proto = schema_utils.schema_from_feature_spec(feature_spec, domains)

    # Add the annotations to the schema.
    for annotation in global_annotations:
        schema_proto.annotation.extra_metadata.add().CopyFrom(annotation)
    # Build a map from logical feature names to Feature protos
    feature_protos_by_name = {}
    for feature in schema_proto.feature:
        feature_protos_by_name[feature.name] = feature
    for sparse_feature in schema_proto.sparse_feature:
        for index_feature in sparse_feature.index_feature:
            feature_protos_by_name.pop(index_feature.name)
        value_feature = feature_protos_by_name.pop(
            sparse_feature.value_feature.name)
        feature_protos_by_name[sparse_feature.name] = value_feature

    # Handle ragged tensor representations.
    tensor_representations = (
        tensor_representation_util.GetTensorRepresentationsFromSchema(
            schema_proto, schema_utils.TENSOR_REPRESENTATION_GROUP))
    if tensor_representations is not None:
        for name, tensor_representation in tensor_representations.items():
            feature_protos_by_name[
                name] = schema_utils.pop_ragged_source_columns(
                    name, tensor_representation, feature_protos_by_name)

    # Update annotations
    for feature_name, annotations in feature_annotations.items():
        feature_proto = feature_protos_by_name[feature_name]
        for annotation in annotations:
            feature_proto.annotation.extra_metadata.add().CopyFrom(annotation)
    for feature_name, tags in feature_tags.items():
        feature_proto = feature_protos_by_name[feature_name]
        for tag in tags:
            feature_proto.annotation.tag.append(tag)
    return schema_proto