def __init__(self, filenames, compression_type=None, buffer_size=None): """Creates a `TextLineDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes to buffer. A value of 0 results in the default buffering values chosen based on the compression type. """ self._filenames = filenames self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES) variant_tensor = gen_dataset_ops.text_line_dataset(self._filenames, self._compression_type, self._buffer_size) super(_TextLineDataset, self).__init__(variant_tensor)
def __init__(self, filenames, compression_type=None, buffer_size=None, name=None): """Creates a `TFRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes in the read buffer. 0 means no buffering. name: (Optional.) A name for the tf.data operation. """ self._filenames = filenames self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES) self._name = name variant_tensor = gen_dataset_ops.tf_record_dataset( self._filenames, self._compression_type, self._buffer_size, metadata=self._metadata.SerializeToString()) super(_TFRecordDataset, self).__init__(variant_tensor)
def __init__(self, filenames, record_bytes, header_bytes=None, footer_bytes=None, buffer_size=None): """Creates a `FixedLengthRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. record_bytes: A `tf.int64` scalar representing the number of bytes in each record. header_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to skip at the start of a file. footer_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to ignore at the end of a file. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes to buffer when reading. """ super(FixedLengthRecordDataset, self).__init__() self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") self._record_bytes = ops.convert_to_tensor( record_bytes, dtype=dtypes.int64, name="record_bytes") self._header_bytes = convert.optional_param_to_tensor( "header_bytes", header_bytes) self._footer_bytes = convert.optional_param_to_tensor( "footer_bytes", footer_bytes) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
def __init__(self, filenames, min_buffer_size=None, max_buffer_size=None, buffer_size=None): """Creates a `RiegeliDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. min_buffer_size: A `tf.int64` scalar which tunes the minimal buffer size, which determines how much data at a time is typically read from the file. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. Default: 4K. max_buffer_size: A `tf.int64` scalar which tunes the maximal buffer size, which determines how much data at a time is typically read from the file. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. Default: 64K. buffer_size: If not None, a shortcut for setting min_buffer_size and max_buffer_size to the same value. """ if buffer_size is not None: min_buffer_size = buffer_size max_buffer_size = buffer_size self._filenames = tf.convert_to_tensor(filenames, name='filenames') self._min_buffer_size = convert.optional_param_to_tensor( 'min_buffer_size', min_buffer_size, argument_default=_DEFAULT_MIN_BUFFER_SIZE) self._max_buffer_size = convert.optional_param_to_tensor( 'max_buffer_size', max_buffer_size, argument_default=_DEFAULT_MAX_BUFFER_SIZE) variant_tensor = gen_riegeli_dataset_ops.riegeli_dataset( self._filenames, self._min_buffer_size, self._max_buffer_size) super(RiegeliDataset, self).__init__(variant_tensor)
def __init__(self, filenames, compression_type=None, buffer_size=None): """Creates a `TFRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes in the read buffer. 0 means no buffering. """ # Force the type to string even if filenames is an empty list. self._filenames = ops.convert_to_tensor( filenames, dtypes.string, name="filenames") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES) variant_tensor = gen_dataset_ops.tf_record_dataset( self._filenames, self._compression_type, self._buffer_size) super(_TFRecordDataset, self).__init__(variant_tensor)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.data.experimental.parallel_interleave()` for details.""" self._input_dataset = input_dataset self._map_func = dataset_ops.StructuredFunctionWrapper( map_func, self._transformation_name(), dataset=input_dataset) if not isinstance(self._map_func.output_structure, dataset_ops.DatasetSpec): raise TypeError("`map_func` must return a `Dataset` object.") self._element_spec = self._map_func.output_structure._element_spec # pylint: disable=protected-access self._cycle_length = ops.convert_to_tensor( cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor( block_length, dtype=dtypes.int64, name="block_length") self._sloppy = ops.convert_to_tensor( sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length) variant_tensor = ged_ops.parallel_interleave_dataset( self._input_dataset._variant_tensor, # pylint: disable=protected-access self._map_func.function.captured_inputs, self._cycle_length, self._block_length, self._sloppy, self._buffer_output_elements, self._prefetch_input_elements, f=self._map_func.function, **self._flat_structure) super(ParallelInterleaveDataset, self).__init__(input_dataset, variant_tensor)
def __init__(self, filenames, data_format_type=None, compression_type=None, block_count=None, block_index=None): """Creates a `OmniFileDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. data_format_type: (Optional.) A scalar containing either (i) 1(orc). compression_type: (Optional.) A scalar containing either (i) the empty string (no compression), (ii) "snappy", or (iii) "lz4", default snappy. block_count: (Optional.) A scalar containing the number of split file. block_index: (Optional.) A scalar containing the index of split file. """ super(OmniFileDataset, self).__init__() self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") self._data_format_type = convert.optional_param_to_tensor( "data_format_type", data_format_type, 1) self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="snappy", argument_dtype=dtypes.string) self._block_count = convert.optional_param_to_tensor( "block_count", block_count, 1) self._block_index = convert.optional_param_to_tensor( "block_index", block_index, 0)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements, name=None): """See `tf.data.experimental.parallel_interleave()` for details.""" self._input_dataset = input_dataset self._map_func = structured_function.StructuredFunctionWrapper( map_func, self._transformation_name(), dataset=input_dataset) if not isinstance(self._map_func.output_structure, dataset_ops.DatasetSpec): raise TypeError( "The `map_func` argument must return a `Dataset` object. Got " f"{_get_type(self._map_func.output_structure)!r}.") self._element_spec = self._map_func.output_structure._element_spec # pylint: disable=protected-access self._cycle_length = ops.convert_to_tensor(cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor(block_length, dtype=dtypes.int64, name="block_length") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length) if sloppy is None: self._deterministic = "default" elif sloppy: self._deterministic = "false" else: self._deterministic = "true" self._metadata = dataset_metadata_pb2.Metadata() if name: self._metadata.name = dataset_ops._validate_and_encode(name) kwargs = self._flat_structure if name or compat.forward_compatible(2021, 9, 30): kwargs["metadata"] = self._metadata.SerializeToString() variant_tensor = ged_ops.legacy_parallel_interleave_dataset_v2( self._input_dataset._variant_tensor, # pylint: disable=protected-access self._map_func.function.captured_inputs, self._cycle_length, self._block_length, self._buffer_output_elements, self._prefetch_input_elements, f=self._map_func.function, deterministic=self._deterministic, **kwargs) super(ParallelInterleaveDataset, self).__init__(input_dataset, variant_tensor)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.contrib.data.parallel_interleave()` for details.""" super(ParallelInterleaveDataset, self).__init__() self._input_dataset = input_dataset @function.Defun(*nest.flatten( sparse.as_dense_types(input_dataset.output_types, input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, input_dataset.output_classes) for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( nested_args, input_dataset.output_types, input_dataset.output_shapes, input_dataset.output_classes) if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access dataset = map_func(*nested_args) else: dataset = map_func(nested_args) if not isinstance(dataset, dataset_ops.Dataset): raise TypeError("`map_func` must return a `Dataset` object.") self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes return dataset._as_variant_tensor() # pylint: disable=protected-access self._map_func = tf_map_func self._map_func.add_to_graph(ops.get_default_graph()) self._cycle_length = ops.convert_to_tensor(cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor(block_length, dtype=dtypes.int64, name="block_length") self._sloppy = ops.convert_to_tensor(sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.contrib.data.parallel_interleave()` for details.""" super(ParallelInterleaveDataset, self).__init__() self._input_dataset = input_dataset @function.Defun(*nest.flatten( sparse.as_dense_types(input_dataset.output_types, input_dataset.output_classes))) def tf_map_func(*args): """A wrapper for Defun that facilitates shape inference.""" # Pass in shape information from the input_dataset. dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, input_dataset.output_classes) for arg, shape in zip(args, nest.flatten(dense_shapes)): arg.set_shape(shape) nested_args = nest.pack_sequence_as(input_dataset.output_types, args) nested_args = sparse.deserialize_sparse_tensors( nested_args, input_dataset.output_types, input_dataset.output_shapes, input_dataset.output_classes) if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access dataset = map_func(*nested_args) else: dataset = map_func(nested_args) if not isinstance(dataset, dataset_ops.Dataset): raise TypeError("`map_func` must return a `Dataset` object.") self._output_classes = dataset.output_classes self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes return dataset._as_variant_tensor() # pylint: disable=protected-access self._map_func = tf_map_func self._map_func.add_to_graph(ops.get_default_graph()) self._cycle_length = ops.convert_to_tensor( cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor( block_length, dtype=dtypes.int64, name="block_length") self._sloppy = ops.convert_to_tensor( sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length)
def __init__(self, filenames, record_bytes, header_bytes=None, footer_bytes=None, buffer_size=None, compression_type=None, name=None): """Creates a `FixedLengthRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. record_bytes: A `tf.int64` scalar representing the number of bytes in each record. header_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to skip at the start of a file. footer_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to ignore at the end of a file. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes to buffer when reading. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. name: (Optional.) A name for the tf.data operation. """ self._filenames = filenames self._record_bytes = ops.convert_to_tensor(record_bytes, dtype=dtypes.int64, name="record_bytes") self._header_bytes = convert.optional_param_to_tensor( "header_bytes", header_bytes) self._footer_bytes = convert.optional_param_to_tensor( "footer_bytes", footer_bytes) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._metadata = dataset_metadata_pb2.Metadata() if name: self._metadata.name = dataset_ops._validate_and_encode(name) variant_tensor = gen_dataset_ops.fixed_length_record_dataset_v2( self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size, self._compression_type, metadata=self._metadata.SerializeToString()) super(_FixedLengthRecordDataset, self).__init__(variant_tensor)
def __init__(self, filenames, record_bytes, header_bytes=None, footer_bytes=None, buffer_size=None, compression_type=None): """Creates a `FixedLengthRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. record_bytes: A `tf.int64` scalar representing the number of bytes in each record. header_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to skip at the start of a file. footer_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to ignore at the end of a file. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes to buffer when reading. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. """ self._filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string, name="filenames") self._record_bytes = ops.convert_to_tensor(record_bytes, dtype=dtypes.int64, name="record_bytes") self._header_bytes = convert.optional_param_to_tensor( "header_bytes", header_bytes) self._footer_bytes = convert.optional_param_to_tensor( "footer_bytes", footer_bytes) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) if (self._compression_type is not None or compat.forward_compatible(2018, 11, 30)): variant_tensor = gen_dataset_ops.fixed_length_record_dataset_v2( self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size, self._compression_type) else: variant_tensor = gen_dataset_ops.fixed_length_record_dataset( self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size) super(FixedLengthRecordDatasetV2, self).__init__(variant_tensor)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.contrib.data.parallel_interleave()` for details.""" super(ParallelInterleaveDataset, self).__init__(input_dataset, map_func, cycle_length, block_length) self._sloppy = ops.convert_to_tensor( sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length)
def __init__(self, filenames, record_bytes, header_bytes=None, footer_bytes=None, buffer_size=None, compression_type=None): """Creates a `FixedLengthRecordDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. record_bytes: A `tf.int64` scalar representing the number of bytes in each record. header_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to skip at the start of a file. footer_bytes: (Optional.) A `tf.int64` scalar representing the number of bytes to ignore at the end of a file. buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes to buffer when reading. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. """ self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") self._record_bytes = ops.convert_to_tensor( record_bytes, dtype=dtypes.int64, name="record_bytes") self._header_bytes = convert.optional_param_to_tensor( "header_bytes", header_bytes) self._footer_bytes = convert.optional_param_to_tensor( "footer_bytes", footer_bytes) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) if (self._compression_type is not None or compat.forward_compatible(2018, 11, 30)): variant_tensor = gen_dataset_ops.fixed_length_record_dataset_v2( self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size, self._compression_type) else: variant_tensor = gen_dataset_ops.fixed_length_record_dataset( self._filenames, self._header_bytes, self._record_bytes, self._footer_bytes, self._buffer_size) super(FixedLengthRecordDatasetV2, self).__init__(variant_tensor)
def __init__(self, filename, compression_type=None): self._filename = ops.convert_to_tensor( filename, dtypes.string, name="filename") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string)
def __init__(self, filename, compression_type=None): self._filename = ops.convert_to_tensor( filename, dtypes.string, name="filename") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string)
def __init__(self, filenames, compression_type=None, buffer_size=None): """Creates a `TextLineDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes to buffer. A value of 0 results in the default buffering values chosen based on the compression type. """ super(TextLineDataset, self).__init__() self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES)
def __init__(self, filenames, buffer_size=None): """Creates a `RiegeliDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. buffer_size: A `tf.int64` scalar which tunes how much data is buffered after reading from the file. Default: 64K. """ self._filenames = tf.convert_to_tensor(filenames, name='filenames') self._buffer_size = convert.optional_param_to_tensor( 'buffer_size', buffer_size, argument_default=_DEFAULT_BUFFER_SIZE) variant_tensor = gen_riegeli_dataset_ops.riegeli_dataset( self._filenames, self._buffer_size) super(RiegeliDataset, self).__init__(variant_tensor)
def __init__(self, filenames, compression_type=None, buffer_size=None, name=None): """Creates a `TextLineDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes to buffer. A value of 0 results in the default buffering values chosen based on the compression type. name: (Optional.) A name for the tf.data operation. """ self._filenames = filenames self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, argument_default=_DEFAULT_READER_BUFFER_SIZE_BYTES) self._metadata = dataset_metadata_pb2.Metadata() if name: self._metadata.name = dataset_ops._validate_and_encode(name) kwargs = {} if name or compat.forward_compatible(2021, 9, 30): kwargs["metadata"] = self._metadata.SerializeToString() variant_tensor = gen_dataset_ops.text_line_dataset( self._filenames, self._compression_type, self._buffer_size, **kwargs) super(_TextLineDataset, self).__init__(variant_tensor)
def __init__(self, input_dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements): """See `tf.data.experimental.parallel_interleave()` for details.""" self._input_dataset = input_dataset self._map_func = dataset_ops.StructuredFunctionWrapper( map_func, self._transformation_name(), dataset=input_dataset) if not isinstance(self._map_func.output_structure, dataset_ops.DatasetStructure): raise TypeError("`map_func` must return a `Dataset` object.") self._structure = self._map_func.output_structure._element_structure # pylint: disable=protected-access self._cycle_length = ops.convert_to_tensor( cycle_length, dtype=dtypes.int64, name="cycle_length") self._block_length = ops.convert_to_tensor( block_length, dtype=dtypes.int64, name="block_length") self._sloppy = ops.convert_to_tensor( sloppy, dtype=dtypes.bool, name="sloppy") self._buffer_output_elements = convert.optional_param_to_tensor( "buffer_output_elements", buffer_output_elements, argument_default=2 * block_length) self._prefetch_input_elements = convert.optional_param_to_tensor( "prefetch_input_elements", prefetch_input_elements, argument_default=2 * cycle_length) variant_tensor = ged_ops.experimental_parallel_interleave_dataset( self._input_dataset._variant_tensor, # pylint: disable=protected-access self._map_func.function.captured_inputs, self._cycle_length, self._block_length, self._sloppy, self._buffer_output_elements, self._prefetch_input_elements, f=self._map_func.function, **dataset_ops.flat_structure(self)) super(ParallelInterleaveDataset, self).__init__(input_dataset, variant_tensor)
def __init__(self, filename, compression_type=None): """Initializes a `TFRecordWriter`. Args: filename: a string path indicating where to write the TFRecord data. compression_type: (Optional.) a string indicating what type of compression to use when writing the file. See `tf.io.TFRecordCompressionType` for what types of compression are available. Defaults to `None`. """ self._filename = ops.convert_to_tensor( filename, dtypes.string, name="filename") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string)
def __init__( self, input_rspecifier, target_rspecifier=None, buffer_size=None, # buffer_size currently not useful delta_order=None, norm_means=False, norm_vars=False, global_cmvn_file=None, left_context=None, right_context=None, num_downsample=None, offset=None, mode=None): self._only_input = target_rspecifier is None super(KaldiReaderDataset, self).__init__() self._input_rspecifier = ops.convert_to_tensor(input_rspecifier, dtype=dtypes.string, name="input_rspecifier") self._target_rspecifier = convert.optional_param_to_tensor( "target_rspecifier", target_rspecifier, "", dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._delta_order = convert.optional_param_to_tensor( "delta_order", delta_order, 0) self._norm_means = convert.optional_param_to_tensor( "norm_means", norm_means, False, dtypes.bool) self._norm_vars = convert.optional_param_to_tensor( "norm_vars", norm_vars, False, dtypes.bool) self._global_cmvn_file = convert.optional_param_to_tensor( "global_cmvn_file", global_cmvn_file, "", dtypes.string) self._left_context = convert.optional_param_to_tensor( "left_context", left_context, 0) self._right_context = convert.optional_param_to_tensor( "right_context", right_context, 0) self._num_downsample = convert.optional_param_to_tensor( "num_downsample", num_downsample, 1) self._offset = convert.optional_param_to_tensor("offset", offset, 0) self._mode = convert.optional_param_to_tensor("mode", mode, "utt", dtypes.string)
def testIntegerDefault(self): resp = convert.optional_param_to_tensor("foo", None) with self.test_session() as sess: self.assertEqual(0, sess.run(resp))
def __init__(self, filenames, record_defaults, compression_type=None, buffer_size=None, header=False, field_delim=",", use_quote_delim=True, na_value="", select_cols=None): """Creates a `CsvDataset` by reading and decoding CSV files. The elements of this dataset correspond to records from the file(s). RFC 4180 format is expected for CSV files (https://tools.ietf.org/html/rfc4180) Note that we allow leading and trailing spaces with int or float field. For example, suppose we have a file 'my_file0.csv' with four CSV columns of different data types: ``` abcdefg,4.28E10,5.55E6,12 hijklmn,-5.3E14,,2 ``` We can construct a CsvDataset from it as follows: ```python tf.compat.v1.enable_eager_execution() dataset = tf.data.experimental.CsvDataset( "my_file*.csv", [tf.float32, # Required field, use dtype or empty tensor tf.constant([0.0], dtype=tf.float32), # Optional field, default to 0.0 tf.int32, # Required field, use dtype or empty tensor ], select_cols=[1,2,3] # Only parse last three columns ) ``` The expected output of its iterations is: ```python for element in dataset: print(element) >> (4.28e10, 5.55e6, 12) >> (-5.3e14, 0.0, 2) ``` Args: filenames: A `tf.string` tensor containing one or more filenames. record_defaults: A list of default values for the CSV fields. Each item in the list is either a valid CSV `DType` (float32, float64, int32, int64, string), or a `Tensor` object with one of the above types. One per column of CSV data, with either a scalar `Tensor` default value for the column if it is optional, or `DType` or empty `Tensor` if required. If both this and `select_columns` are specified, these must have the same lengths, and `column_defaults` is assumed to be sorted in order of increasing column index. compression_type: (Optional.) A `tf.string` scalar evaluating to one of `""` (no compression), `"ZLIB"`, or `"GZIP"`. Defaults to no compression. buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes to buffer while reading files. Defaults to 4MB. header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s) have header line(s) that should be skipped when parsing. Defaults to `False`. field_delim: (Optional.) A `tf.string` scalar containing the delimiter character that separates fields in a record. Defaults to `","`. use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats double quotation marks as regular characters inside of string fields (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`. na_value: (Optional.) A `tf.string` scalar indicating a value that will be treated as NA/NaN. select_cols: (Optional.) A sorted list of column indices to select from the input data. If specified, only this subset of columns will be parsed. Defaults to parsing all columns. """ self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") self._compression_type = convert.optional_param_to_tensor( "compression_type", compression_type, argument_default="", argument_dtype=dtypes.string) record_defaults = [ constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x for x in record_defaults ] self._record_defaults = ops.convert_n_to_tensor( record_defaults, name="record_defaults") self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._header = ops.convert_to_tensor( header, dtype=dtypes.bool, name="header") self._field_delim = ops.convert_to_tensor( field_delim, dtype=dtypes.string, name="field_delim") self._use_quote_delim = ops.convert_to_tensor( use_quote_delim, dtype=dtypes.bool, name="use_quote_delim") self._na_value = ops.convert_to_tensor( na_value, dtype=dtypes.string, name="na_value") self._select_cols = convert.optional_param_to_tensor( "select_cols", select_cols, argument_default=[], argument_dtype=dtypes.int64, ) self._structure = structure.NestedStructure( tuple(structure.TensorStructure(d.dtype, []) for d in self._record_defaults)) variant_tensor = gen_experimental_dataset_ops.experimental_csv_dataset( filenames=self._filenames, record_defaults=self._record_defaults, buffer_size=self._buffer_size, header=self._header, output_shapes=self._structure._flat_shapes, # pylint: disable=protected-access field_delim=self._field_delim, use_quote_delim=self._use_quote_delim, na_value=self._na_value, select_cols=self._select_cols, compression_type=self._compression_type) super(CsvDatasetV2, self).__init__(variant_tensor)
def testStringDefault(self): resp = convert.optional_param_to_tensor("bar", None, "default", dtypes.string) with self.cached_session() as sess: self.assertEqual(compat.as_bytes("default"), sess.run(resp))
def testIntegerDefault(self): resp = convert.optional_param_to_tensor("foo", None) self.assertEqual(0, self.evaluate(resp))
def testString(self): resp = convert.optional_param_to_tensor("bar", "value", "default", dtypes.string) self.assertEqual(compat.as_bytes("value"), self.evaluate(resp))
def testInteger(self): resp = convert.optional_param_to_tensor("foo", 3) self.assertEqual(3, self.evaluate(resp))
def testIntegerDefault(self): resp = convert.optional_param_to_tensor("foo", None) self.assertEqual(0, self.evaluate(resp))
def testString(self): resp = convert.optional_param_to_tensor("bar", "value", "default", dtypes.string) with self.test_session() as sess: self.assertEqual(compat.as_bytes("value"), sess.run(resp))
def testIntegerDefault(self): resp = convert.optional_param_to_tensor("foo", None) with self.test_session() as sess: self.assertEqual(0, sess.run(resp))
def testStringDefault(self): resp = convert.optional_param_to_tensor("bar", None, "default", dtypes.string) with self.cached_session() as sess: self.assertEqual(compat.as_bytes("default"), sess.run(resp))
def __init__( self, matrix_rspecifier=None, vector_rspecifier=None, int_vector_rspecifier=None, buffer_size=None, # buffer_size currently not useful delta_order=None, norm_means=False, norm_vars=False, global_cmvn_file=None, left_context=None, right_context=None, num_downsample=None, offset=None, mode=None): if not (matrix_rspecifier or vector_rspecifier or int_vector_rspecifier): raise ValueError("all supported reader is None") self._readers_idx = [ 1 if r else 0 for r in [matrix_rspecifier, vector_rspecifier, int_vector_rspecifier] ] super(KaldiReaderDataset, self).__init__() self._matrix_rspecifier = convert.optional_param_to_tensor( "matrix_rspecifier", matrix_rspecifier, "", dtypes.string) self._vector_rspecifier = convert.optional_param_to_tensor( "vector_rspecifier", vector_rspecifier, "", dtypes.string) self._int_vector_rspecifier = convert.optional_param_to_tensor( "int_vector_rspecifier", int_vector_rspecifier, "", dtypes.string) self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._delta_order = convert.optional_param_to_tensor( "delta_order", delta_order, 0) self._norm_means = convert.optional_param_to_tensor( "norm_means", norm_means, False, dtypes.bool) self._norm_vars = convert.optional_param_to_tensor( "norm_vars", norm_vars, False, dtypes.bool) self._global_cmvn_file = convert.optional_param_to_tensor( "global_cmvn_file", global_cmvn_file, "", dtypes.string) self._left_context = convert.optional_param_to_tensor( "left_context", left_context, 0) self._right_context = convert.optional_param_to_tensor( "right_context", right_context, 0) self._num_downsample = convert.optional_param_to_tensor( "num_downsample", num_downsample, 1) self._offset = convert.optional_param_to_tensor("offset", offset, 0) self._mode = convert.optional_param_to_tensor("mode", mode, "utt", dtypes.string)
def testInteger(self): resp = convert.optional_param_to_tensor("foo", 3) with self.cached_session() as sess: self.assertEqual(3, sess.run(resp))
def testInteger(self): resp = convert.optional_param_to_tensor("foo", 3) self.assertEqual(3, self.evaluate(resp))
def testInteger(self): resp = convert.optional_param_to_tensor("foo", 3) with self.cached_session() as sess: self.assertEqual(3, sess.run(resp))
def testString(self): resp = convert.optional_param_to_tensor("bar", "value", "default", dtypes.string) with self.test_session() as sess: self.assertEqual(compat.as_bytes("value"), sess.run(resp))
def __init__(self, filenames, record_defaults, buffer_size=None, header=False, field_delim=",", use_quote_delim=True, na_value="", select_cols=None): """Creates a `CsvDataset` by reading and decoding CSV files. The elements of this dataset correspond to records from the file(s). RFC 4180 format is expected for CSV files (https://tools.ietf.org/html/rfc4180) Note that we allow leading and trailing spaces with int or float field. For example, suppose we have a file 'my_file0.csv' with four CSV columns of different data types: ``` abcdefg,4.28E10,5.55E6,12 hijklmn,-5.3E14,,2 ``` We can construct a CsvDataset from it as follows: ```python dataset = tf.contrib.data.CsvDataset( "my_file*.csv", [tf.float32, # Required field, use dtype or empty tensor tf.constant([0.0], dtype=tf.float32), # Optional field, default to 0.0 tf.int32, # Required field, use dtype or empty tensor ], select_cols=[1,2,3] # Only parse last three columns ) ``` The expected output of its iterations is: ```python next = dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: while True: try: print(sess.run(nxt)) except tf.errors.OutOfRangeError: break >> (4.28e10, 5.55e6, 12) >> (-5.3e14, 0.0, 2) ``` Args: filenames: A `tf.string` tensor containing one or more filenames. record_defaults: A list of default values for the CSV fields. Each item in the list is either a valid CSV `DType` (float32, float64, int32, int64, string), or a `Tensor` object with one of the above types. One per column of CSV data, with either a scalar `Tensor` default value for the column if it is optional, or `DType` or empty `Tensor` if required. If both this and `select_columns` are specified, these must have the same lengths, and `column_defaults` is assumed to be sorted in order of increasing column index. buffer_size: (Optional.) A `tf.int64` scalar denoting the number of bytes to buffer while reading files. Defaults to 4MB. header: (Optional.) A `tf.bool` scalar indicating whether the CSV file(s) have header line(s) that should be skipped when parsing. Defaults to `False`. field_delim: (Optional.) A `tf.string` scalar containing the delimiter character that separates fields in a record. Defaults to `","`. use_quote_delim: (Optional.) A `tf.bool` scalar. If `False`, treats double quotation marks as regular characters inside of string fields (ignoring RFC 4180, Section 2, Bullet 5). Defaults to `True`. na_value: (Optional.) A `tf.string` scalar indicating a value that will be treated as NA/NaN. select_cols: (Optional.) A sorted list of column indices to select from the input data. If specified, only this subset of columns will be parsed. Defaults to parsing all columns. """ super(CsvDataset, self).__init__() self._filenames = ops.convert_to_tensor( filenames, dtype=dtypes.string, name="filenames") record_defaults = [ constant_op.constant([], dtype=x) if x in _ACCEPTABLE_CSV_TYPES else x for x in record_defaults ] self._record_defaults = ops.convert_n_to_tensor( record_defaults, name="record_defaults") self._buffer_size = convert.optional_param_to_tensor( "buffer_size", buffer_size, _DEFAULT_READER_BUFFER_SIZE_BYTES) self._header = ops.convert_to_tensor( header, dtype=dtypes.bool, name="header") self._field_delim = ops.convert_to_tensor( field_delim, dtype=dtypes.string, name="field_delim") self._use_quote_delim = ops.convert_to_tensor( use_quote_delim, dtype=dtypes.bool, name="use_quote_delim") self._na_value = ops.convert_to_tensor( na_value, dtype=dtypes.string, name="na_value") self._select_cols = convert.optional_param_to_tensor( "select_cols", select_cols, argument_default=[], argument_dtype=dtypes.int64, ) self._output_shapes = tuple( tensor_shape.scalar() for _ in range(len(record_defaults))) self._output_types = tuple(d.dtype for d in self._record_defaults) self._output_classes = tuple( ops.Tensor for _ in range(len(record_defaults)))
def testString(self): resp = convert.optional_param_to_tensor("bar", "value", "default", dtypes.string) self.assertEqual(compat.as_bytes("value"), self.evaluate(resp))