def testCreateTfExampleParserConfig(self, tensor_representation,
                                        feature_type, tf_example,
                                        expected_feature,
                                        expected_parsed_results):
        tensor_representation = text_format.Parse(
            tensor_representation, schema_pb2.TensorRepresentation())
        feature = tensor_representation_util.CreateTfExampleParserConfig(
            tensor_representation, feature_type)

        # Checks that the parser configs are correct.
        for actual_arg, expected_arg in zip(feature, expected_feature):
            self.assertAllEqual(actual_arg, expected_arg)

        # Checks that the parser configs can be used with tf.io.parse_example()
        actual_tensors = tf.io.parse_single_example(tf_example,
                                                    {'feat': feature})
        actual = actual_tensors['feat']
        if isinstance(actual, tf.SparseTensor) or isinstance(
                actual, tf.compat.v1.SparseTensorValue):
            self.assertAllEqual(actual.values, expected_parsed_results.values)
            self.assertAllEqual(actual.indices,
                                expected_parsed_results.indices)
            self.assertAllEqual(actual.dense_shape,
                                expected_parsed_results.dense_shape)
        else:
            self.assertAllEqual(actual, expected_parsed_results)
예제 #2
0
 def testCreateTfExampleParserConfigRagged(self):
   feature_type = schema_pb2.INT
   tensor_representation = text_format.Parse(
       """
               ragged_tensor {
                 feature_path {
                   step: "ragged_feature"
                 }
               }""", schema_pb2.TensorRepresentation())
   with self.assertRaisesRegex(NotImplementedError,
                               'TensorRepresentation: .* is not supported.'):
     tensor_representation_util.CreateTfExampleParserConfig(
         tensor_representation, feature_type)
예제 #3
0
def _ragged_tensor_representation_as_feature_spec(
    name: str, tensor_representation: schema_pb2.TensorRepresentation,
    feature_by_name: Dict[str, schema_pb2.Feature],
    string_domains: Dict[str, common_types.DomainType]
) -> Tuple[common_types.RaggedFeature, Optional[common_types.DomainType]]:
    """Returns a representation of a RaggedTensor as a feature spec."""
    if not common_types.is_ragged_feature_available():
        raise ValueError('RaggedFeature is not supported in TF 1.x.')

    value_feature = pop_ragged_source_columns(name, tensor_representation,
                                              feature_by_name)
    spec = tensor_representation_util.CreateTfExampleParserConfig(
        tensor_representation, value_feature.type)
    domain = _get_domain(value_feature, string_domains)
    return typing.cast(common_types.RaggedFeature, spec), domain
 def testCreateTfExampleParserConfigRagged(self):
   feature_type = schema_pb2.INT
   tensor_representation = text_format.Parse(
       """
               ragged_tensor {
                 feature_path {
                   step: "foo"
                   step: "ragged_feature"
                 }
               }""", schema_pb2.TensorRepresentation())
   with self.assertRaisesRegex(
       ValueError, ('Parsing spec from a RaggedTensor with multiple steps in '
                    'feature_path is not implemented.')):
     tensor_representation_util.CreateTfExampleParserConfig(
         tensor_representation, feature_type)
예제 #5
0
    def TensorFlowDataset(
            self, options: dataset_options.TensorFlowDatasetOptions
    ) -> tf.data.Dataset:
        """Creates a TFRecordDataset that yields Tensors.

    The serialized tf.Examples are parsed by `tf.io.parse_example` to create
    Tensors.

    See base class (tfxio.TFXIO) for more details.

    Args:
      options: an options object for the tf.data.Dataset. See
        `dataset_options.TensorFlowDatasetOptions` for more details.

    Returns:
      A dataset of `dict` elements, (or a tuple of `dict` elements and label).
      Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
      `RaggedTensor` objects.

    Raises:
      ValueError: if there is something wrong with the tensor_representation.
    """
        feature_name_to_type = {f.name: f.type for f in self._schema.feature}

        # Creates parsing config for each feature.
        features = {}
        tensor_representations = self.TensorRepresentations()
        for feature_name, tensor_representation in tensor_representations.items(
        ):
            feature_type = feature_name_to_type[feature_name]
            features[
                feature_name] = tensor_representation_util.CreateTfExampleParserConfig(
                    tensor_representation, feature_type)

        file_pattern = tf.convert_to_tensor(self._file_pattern)
        return tf.data.experimental.make_batched_features_dataset(
            file_pattern,
            features=features,
            batch_size=options.batch_size,
            reader_args=[
                record_based_tfxio.DetectCompressionType(file_pattern)
            ],
            num_epochs=options.num_epochs,
            shuffle=options.shuffle,
            shuffle_buffer_size=options.shuffle_buffer_size,
            shuffle_seed=options.shuffle_seed,
            drop_final_batch=options.drop_final_batch,
            label_key=options.label_key)
 def testCreateTfExampleParserConfigInvalidDefaultValue(self):
   tensor_representation = text_format.Parse(
       """
               dense_tensor {
                 column_name: "dense_column"
                 shape {
                   dim {
                     size: 1
                   }
                 }
                 default_value {
                   int_value: -1
                 }
               }""", schema_pb2.TensorRepresentation())
   feature_type = schema_pb2.FLOAT
   with self.assertRaisesRegex(
       ValueError, 'FeatureType:.* is incompatible with default_value:.*'):
     tensor_representation_util.CreateTfExampleParserConfig(
         tensor_representation, feature_type)
예제 #7
0
    def _GetTfExampleParserConfig(
            self) -> Tuple[Dict[Text, Any], Dict[Text, Text]]:
        """Creates a dict feature spec that can be used in tf.io.parse_example().

    To reduce confusion: 'tensor name' are the keys of TensorRepresentations.
    'feature name' are the keys to the tf.Example parser config.
    'column name' are the features in the schema.

    Returns:
      Two maps. The first is the parser config that maps from feature
      name to a tf.io Feature. The second is a mapping from feature names to
      tensor names.

    Raises:
      ValueError: if the tf.Example parser config is invalid.
    """
        if self._schema is None:
            raise ValueError(
                "Unable to create a parsing config because no schema is provided."
            )

        column_name_to_type = {f.name: f.type for f in self._schema.feature}
        features = {}
        feature_name_to_tensor_name = {}
        for tensor_name, tensor_rep in self.TensorRepresentations().items():
            paths = tensor_rep_util.GetSourceColumnsFromTensorRepresentation(
                tensor_rep)
            if len(paths) == 1:
                # The parser config refers to a single tf.Example feature. In this case,
                # the key to the parser config needs to be the name of the feature.
                column_name = paths[0].initial_step()
                value_type = column_name_to_type[column_name]
            else:
                # The parser config needs to refer to multiple tf.Example features. In
                # this case the key to the parser config does not matter. We preserve
                # the tensor representation key.
                column_name = tensor_name
                value_type = column_name_to_type[
                    tensor_rep_util.
                    GetSourceValueColumnFromTensorRepresentation(
                        tensor_rep).initial_step()]
            parse_config = tensor_rep_util.CreateTfExampleParserConfig(
                tensor_rep, value_type)

            if _is_multi_column_parser_config(parse_config):
                # Create internal naming, to prevent possible naming collisions between
                # tensor_name and column_name.
                feature_name = _FEATURE_NAME_PREFIX + tensor_name + "_" + column_name
            else:
                feature_name = column_name
            if feature_name in feature_name_to_tensor_name:
                clashing_tensor_rep = self.TensorRepresentations()[
                    feature_name_to_tensor_name[feature_name]]
                raise ValueError(
                    f"Unable to create a valid parsing config. Feature "
                    f"name: {feature_name} is a duplicate of "
                    f"tensor representation: {clashing_tensor_rep}")
            feature_name_to_tensor_name[feature_name] = tensor_name
            features[feature_name] = parse_config

        _validate_tf_example_parser_config(features, self._schema)

        return features, feature_name_to_tensor_name