def decode_raw(input_bytes, out_type, little_endian=True, fixed_length=None, name=None): """Convert raw byte strings into tensors. Args: input_bytes: Each element of the input Tensor is converted to an array of bytes. out_type: `DType` of the output. Acceptable types are `half`, `float`, `double`, `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. little_endian: Whether the `input_bytes` data is in little-endian format. Data will be converted into host byte order if necessary. fixed_length: If set, the first `fixed_length` bytes of each element will be converted. Data will be zero-padded or truncated to the specified length. `fixed_length` must be a multiple of the size of `out_type`. `fixed_length` must be specified if the elements of `input_bytes` are of variable length. name: A name for the operation (optional). Returns: A `Tensor` object storing the decoded bytes. """ if fixed_length is not None: return gen_parsing_ops.decode_padded_raw(input_bytes, fixed_length=fixed_length, out_type=out_type, little_endian=little_endian, name=name) else: return gen_parsing_ops.decode_raw(input_bytes, out_type, little_endian=little_endian, name=name)
def decode_raw_v1( input_bytes=None, out_type=None, little_endian=True, name=None, bytes=None # pylint: disable=redefined-builtin ): """Convert raw byte strings into tensors. Args: input_bytes: Each element of the input Tensor is converted to an array of bytes. out_type: `DType` of the output. Acceptable types are `half`, `float`, `double`, `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. little_endian: Whether the `input_bytes` data is in little-endian format. Data will be converted into host byte order if necessary. name: A name for the operation (optional). bytes: Deprecated parameter. Use `input_bytes` instead. Returns: A `Tensor` object storing the decoded bytes. """ input_bytes = deprecation.deprecated_argument_lookup( "input_bytes", input_bytes, "bytes", bytes) # out_type is a required positional argument in the original API, and had to # be changed to a keyword argument in order to facilitate the transition from # the reserved named `bytes` to `input_bytes`. Ensure it's still set. if out_type is None: raise ValueError( "decode_raw_v1() missing 1 positional argument: 'out_type'") return gen_parsing_ops.decode_raw(input_bytes, out_type, little_endian=little_endian, name=name)
def decode_raw(input_bytes, out_type, little_endian=True, fixed_length=None, name=None): r"""Convert raw bytes from input tensor into numeric tensors. The input tensor is interpreted as a sequence of bytes. These bytes are then decoded as numbers in the format specified by `out_type`. >>> tf.io.decode_raw(tf.constant("1"), tf.uint8) <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)> >>> tf.io.decode_raw(tf.constant("1,2"), tf.uint8) <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 44, 50], dtype=uint8)> Note that the rank of the output tensor is always one more than the input one: >>> tf.io.decode_raw(tf.constant(["1","2"]), tf.uint8).shape TensorShape([2, 1]) >>> tf.io.decode_raw(tf.constant([["1"],["2"]]), tf.uint8).shape TensorShape([2, 1, 1]) This is because each byte in the input is converted to a new value on the output (if output type is `uint8` or `int8`, otherwise chunks of inputs get coverted to a new value): >>> tf.io.decode_raw(tf.constant("123"), tf.uint8) <tf.Tensor: shape=(3,), dtype=uint8, numpy=array([49, 50, 51], dtype=uint8)> >>> tf.io.decode_raw(tf.constant("1234"), tf.uint8) <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([49, 50, 51, 52], ... >>> # chuncked output >>> tf.io.decode_raw(tf.constant("12"), tf.uint16) <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([12849], dtype=uint16)> >>> tf.io.decode_raw(tf.constant("1234"), tf.uint16) <tf.Tensor: shape=(2,), dtype=uint16, numpy=array([12849, 13363], ... >>> # int64 output >>> tf.io.decode_raw(tf.constant("12345678"), tf.int64) <tf.Tensor: ... numpy=array([4050765991979987505])> >>> tf.io.decode_raw(tf.constant("1234567887654321"), tf.int64) <tf.Tensor: ... numpy=array([4050765991979987505, 3544952156018063160])> The operation allows specifying endianness via the `little_endian` parameter. >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16) <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2826], dtype=int16)> >>> hex(2826) '0xb0a' >>> tf.io.decode_raw(tf.constant("\x0a\x0b"), tf.int16, little_endian=False) <tf.Tensor: shape=(1,), dtype=int16, numpy=array([2571], dtype=int16)> >>> hex(2571) '0xa0b' If the elements of `input_bytes` are of different length, you must specify `fixed_length`: >>> tf.io.decode_raw(tf.constant([["1"],["23"]]), tf.uint8, fixed_length=4) <tf.Tensor: shape=(2, 1, 4), dtype=uint8, numpy= array([[[49, 0, 0, 0]], [[50, 51, 0, 0]]], dtype=uint8)> If the `fixed_length` value is larger that the length of the `out_type` dtype, multiple values are generated: >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=4) <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ... Note: There is currently a bug in `fixed_length` that can result in data loss: >>> # truncated to length of type as it matches fixed_length >>> tf.io.decode_raw(tf.constant(["1212"]), tf.uint16, fixed_length=2) <tf.Tensor: shape=(1, 1), dtype=uint16, numpy=array([[12849]], dtype=uint16)> >>> # ignores the second component >>> tf.io.decode_raw(tf.constant(["12","34"]), tf.uint16, fixed_length=2) <tf.Tensor: shape=(2, 1), dtype=uint16, numpy= array([[12849], [ 0]], dtype=uint16)> >>> tf.io.decode_raw(tf.constant(["12","34"]), tf.uint16, fixed_length=4) <tf.Tensor: shape=(2, 2), dtype=uint16, numpy= array([[12849, 0], [ 0, 0]], dtype=uint16)> This will be fixed on a future release of TensorFlow. Args: input_bytes: Each element of the input Tensor is converted to an array of bytes. Currently, this must be a tensor of strings (bytes), although semantically the operation should support any input. out_type: `DType` of the output. Acceptable types are `half`, `float`, `double`, `int32`, `uint16`, `uint8`, `int16`, `int8`, `int64`. little_endian: Whether the `input_bytes` data is in little-endian format. Data will be converted into host byte order if necessary. fixed_length: If set, the first `fixed_length` bytes of each element will be converted. Data will be zero-padded or truncated to the specified length. `fixed_length` must be a multiple of the size of `out_type`. `fixed_length` must be specified if the elements of `input_bytes` are of variable length. name: A name for the operation (optional). Returns: A `Tensor` object storing the decoded bytes. """ if fixed_length is not None: return gen_parsing_ops.decode_padded_raw(input_bytes, fixed_length=fixed_length, out_type=out_type, little_endian=little_endian, name=name) else: return gen_parsing_ops.decode_raw(input_bytes, out_type, little_endian=little_endian, name=name)