Esempio n. 1
0
  def __init__(self, input_dataset, features, num_parallel_calls):
    self._input_dataset = input_dataset
    if not input_dataset._element_structure.is_compatible_with(  # pylint: disable=protected-access
        structure.TensorStructure(dtypes.string, [None])):
      raise TypeError("Input dataset should be a dataset of vectors of strings")
    self._num_parallel_calls = num_parallel_calls
    # pylint: disable=protected-access
    self._features = parsing_ops._prepend_none_dimension(features)
    # sparse_keys and dense_keys come back sorted here.
    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
     dense_shapes) = parsing_ops._features_to_raw_params(
         self._features, [
             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
         ])
    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
         dense_types, dense_shapes)
    # pylint: enable=protected-access
    self._sparse_keys = sparse_keys
    self._sparse_types = sparse_types
    self._dense_keys = dense_keys
    self._dense_defaults = dense_defaults_vec
    self._dense_shapes = dense_shapes
    self._dense_types = dense_types
    input_dataset_shape = dataset_ops.get_legacy_output_shapes(
        self._input_dataset)
    dense_output_shapes = [input_dataset_shape.concatenate(shape)
                           for shape in dense_shape_as_shape]
    sparse_output_shapes = [input_dataset_shape.concatenate([None])
                            for _ in range(len(sparse_keys))]

    output_shapes = dict(
        zip(self._dense_keys + self._sparse_keys,
            dense_output_shapes + sparse_output_shapes))
    output_types = dict(
        zip(self._dense_keys + self._sparse_keys,
            self._dense_types + self._sparse_types))
    output_classes = dict(
        zip(self._dense_keys + self._sparse_keys,
            [ops.Tensor for _ in range(len(self._dense_defaults))] +
            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
            ]))
    self._structure = structure.convert_legacy_structure(
        output_types, output_shapes, output_classes)

    variant_tensor = (
        gen_experimental_dataset_ops.experimental_parse_example_dataset(
            self._input_dataset._variant_tensor,  # pylint: disable=protected-access
            self._num_parallel_calls,
            self._dense_defaults,
            self._sparse_keys,
            self._dense_keys,
            self._sparse_types,
            self._dense_shapes,
            **dataset_ops.flat_structure(self)))
    super(_ParseExampleDataset, self).__init__(input_dataset, variant_tensor)
Esempio n. 2
0
    def __init__(self, input_dataset, features, num_parallel_calls):
        super(_ParseExampleDataset, self).__init__()
        self._input_dataset = input_dataset
        if not all(types == dtypes.string
                   for types in nest.flatten(input_dataset.output_types)):
            raise TypeError(
                "Input dataset should be a dataset of vectors of strings")
        self._num_parallel_calls = num_parallel_calls
        # pylint: disable=protected-access
        self._features = parsing_ops._prepend_none_dimension(features)
        # sparse_keys and dense_keys come back sorted here.
        (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
         dense_shapes) = parsing_ops._features_to_raw_params(
             self._features, [
                 parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
                 parsing_ops.FixedLenFeature,
                 parsing_ops.FixedLenSequenceFeature
             ])
        # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
        (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys,
         dense_shapes,
         dense_shape_as_shape) = parsing_ops._process_raw_parameters(
             None, dense_defaults, sparse_keys, sparse_types, dense_keys,
             dense_types, dense_shapes)
        # pylint: enable=protected-access
        self._sparse_keys = sparse_keys
        self._sparse_types = sparse_types
        self._dense_keys = dense_keys
        self._dense_defaults = dense_defaults_vec
        self._dense_shapes = dense_shapes
        self._dense_types = dense_types
        dense_output_shapes = [
            self._input_dataset.output_shapes.concatenate(shape)
            for shape in dense_shape_as_shape
        ]
        sparse_output_shapes = [
            self._input_dataset.output_shapes.concatenate([None])
            for _ in range(len(sparse_keys))
        ]

        self._output_shapes = dict(
            zip(self._dense_keys + self._sparse_keys,
                dense_output_shapes + sparse_output_shapes))
        self._output_types = dict(
            zip(self._dense_keys + self._sparse_keys,
                self._dense_types + self._sparse_types))
        self._output_classes = dict(
            zip(self._dense_keys + self._sparse_keys,
                [ops.Tensor for _ in range(len(self._dense_defaults))] + [
                    sparse_tensor.SparseTensor
                    for _ in range(len(self._sparse_keys))
                ]))
Esempio n. 3
0
  def __init__(self, input_dataset, features, num_parallel_calls):
    super(_ParseExampleDataset, self).__init__(input_dataset)
    self._input_dataset = input_dataset
    if not all(types == dtypes.string
               for types in nest.flatten(input_dataset.output_types)):
      raise TypeError("Input dataset should be a dataset of vectors of strings")
    self._num_parallel_calls = num_parallel_calls
    # pylint: disable=protected-access
    self._features = parsing_ops._prepend_none_dimension(features)
    # sparse_keys and dense_keys come back sorted here.
    (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
     dense_shapes) = parsing_ops._features_to_raw_params(
         self._features, [
             parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
             parsing_ops.FixedLenFeature, parsing_ops.FixedLenSequenceFeature
         ])
    # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
    (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys, dense_shapes,
     dense_shape_as_shape) = parsing_ops._process_raw_parameters(
         None, dense_defaults, sparse_keys, sparse_types, dense_keys,
         dense_types, dense_shapes)
    # pylint: enable=protected-access
    self._sparse_keys = sparse_keys
    self._sparse_types = sparse_types
    self._dense_keys = dense_keys
    self._dense_defaults = dense_defaults_vec
    self._dense_shapes = dense_shapes
    self._dense_types = dense_types
    dense_output_shapes = [
        self._input_dataset.output_shapes.concatenate(shape)
        for shape in dense_shape_as_shape
    ]
    sparse_output_shapes = [
        self._input_dataset.output_shapes.concatenate([None])
        for _ in range(len(sparse_keys))
    ]

    self._output_shapes = dict(
        zip(self._dense_keys + self._sparse_keys,
            dense_output_shapes + sparse_output_shapes))
    self._output_types = dict(
        zip(self._dense_keys + self._sparse_keys,
            self._dense_types + self._sparse_types))
    self._output_classes = dict(
        zip(self._dense_keys + self._sparse_keys,
            [ops.Tensor for _ in range(len(self._dense_defaults))] +
            [sparse_tensor.SparseTensor for _ in range(len(self._sparse_keys))
            ]))
Esempio n. 4
0
    def __init__(self, input_dataset, features, num_parallel_calls):
        self._input_dataset = input_dataset
        if not input_dataset._element_structure.is_compatible_with(  # pylint: disable=protected-access
                structure.TensorStructure(dtypes.string, [None])):
            raise TypeError(
                "Input dataset should be a dataset of vectors of strings")
        self._num_parallel_calls = num_parallel_calls
        # pylint: disable=protected-access
        self._features = parsing_ops._prepend_none_dimension(features)
        # sparse_keys and dense_keys come back sorted here.
        (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
         dense_shapes) = parsing_ops._features_to_raw_params(
             self._features, [
                 parsing_ops.VarLenFeature, parsing_ops.SparseFeature,
                 parsing_ops.FixedLenFeature,
                 parsing_ops.FixedLenSequenceFeature
             ])
        # TODO(b/112859642): Pass sparse_index and sparse_values for SparseFeature.
        (_, dense_defaults_vec, sparse_keys, sparse_types, dense_keys,
         dense_shapes,
         dense_shape_as_shape) = parsing_ops._process_raw_parameters(
             None, dense_defaults, sparse_keys, sparse_types, dense_keys,
             dense_types, dense_shapes)
        # pylint: enable=protected-access
        self._sparse_keys = sparse_keys
        self._sparse_types = sparse_types
        self._dense_keys = dense_keys
        self._dense_defaults = dense_defaults_vec
        self._dense_shapes = dense_shapes
        self._dense_types = dense_types
        input_dataset_shape = dataset_ops.get_legacy_output_shapes(
            self._input_dataset)
        dense_output_shapes = [
            input_dataset_shape.concatenate(shape)
            for shape in dense_shape_as_shape
        ]
        sparse_output_shapes = [
            input_dataset_shape.concatenate([None])
            for _ in range(len(sparse_keys))
        ]

        output_shapes = dict(
            zip(self._dense_keys + self._sparse_keys,
                dense_output_shapes + sparse_output_shapes))
        output_types = dict(
            zip(self._dense_keys + self._sparse_keys,
                self._dense_types + self._sparse_types))
        output_classes = dict(
            zip(self._dense_keys + self._sparse_keys,
                [ops.Tensor for _ in range(len(self._dense_defaults))] + [
                    sparse_tensor.SparseTensor
                    for _ in range(len(self._sparse_keys))
                ]))
        self._structure = structure.convert_legacy_structure(
            output_types, output_shapes, output_classes)

        variant_tensor = (
            gen_experimental_dataset_ops.experimental_parse_example_dataset(
                self._input_dataset._variant_tensor,  # pylint: disable=protected-access
                self._num_parallel_calls,
                self._dense_defaults,
                self._sparse_keys,
                self._dense_keys,
                self._sparse_types,
                self._dense_shapes,
                **dataset_ops.flat_structure(self)))
        super(_ParseExampleDataset, self).__init__(input_dataset,
                                                   variant_tensor)