Example #1
0
 def __init__(self,
              filename,
              internal=False):
   with tf.name_scope("HDF5IOTensor") as scope:
     resource, columns = core_ops.io_hdf5_readable_init(
         filename,
         container=scope,
         shared_name="%s/%s" % (filename, uuid.uuid4().hex))
     columns = [column.decode() for column in columns.numpy().tolist()]
     elements = []
     for column in columns:
       shape, dtype = core_ops.io_hdf5_readable_spec(resource, column)
       shape = tf.TensorShape(shape.numpy())
       dtype = tf.as_dtype(dtype.numpy())
       spec = tf.TensorSpec(shape, dtype, column)
       function = io_tensor_ops._IOTensorComponentFunction( # pylint: disable=protected-access
           core_ops.io_hdf5_readable_read,
           resource, column, shape, dtype)
       elements.append(
           io_tensor_ops.BaseIOTensor(
               spec, function, internal=internal))
     spec = tuple([e.spec for e in elements])
     super(HDF5IOTensor, self).__init__(
         spec, columns, elements,
         internal=internal)
Example #2
0
 def __init__(self, filename, internal=False):
     with tf.name_scope("HDF5IOTensor") as scope:
         # TODO: unique shared_name might be removed if HDF5 is thead-safe?
         resource, columns = core_ops.io_hdf5_readable_init(
             filename,
             container=scope,
             shared_name="%s/%s" % (filename, uuid.uuid4().hex))
         columns = [column.decode() for column in columns.numpy().tolist()]
         elements = []
         for column in columns:
             shape, dtype = core_ops.io_hdf5_readable_spec(resource, column)
             shape = tf.TensorShape(shape.numpy())
             dtype = tf.as_dtype(dtype.numpy())
             spec = tf.TensorSpec(shape, dtype, column)
             if shape.rank == 0:
                 value = core_ops.io_hdf5_readable_read(
                     resource, 0, shape, column, dtype)
                 elements.append(
                     io_tensor_ops.ScalarIOTensor(spec,
                                                  value,
                                                  internal=internal))
             else:
                 function = _HDF5IOTensorFunction(
                     core_ops.io_hdf5_readable_read, resource, column,
                     shape, dtype)
                 elements.append(
                     io_tensor_ops.BaseIOTensor(spec,
                                                function,
                                                internal=internal))
         spec = tuple([e.spec for e in elements])
         super(HDF5IOTensor, self).__init__(spec,
                                            columns,
                                            elements,
                                            internal=internal)
Example #3
0
    def __init__(self, filename, dataset, spec=None, internal=True):
        """HDF5IODataset."""
        with tf.name_scope("HDF5IODataset"):
            assert internal

            # TODO: unique shared_name might be removed if HDF5 is thead-safe?
            resource, _ = core_ops.io_hdf5_readable_init(
                filename,
                container="HDF5IODataset",
                shared_name="{}/{}".format(filename,
                                           uuid.uuid4().hex),
            )
            if tf.executing_eagerly():
                shape, dtype = core_ops.io_hdf5_readable_spec(
                    resource, dataset)
                dtype = tf.as_dtype(dtype.numpy())
            else:
                assert spec is not None
                shape, _ = core_ops.io_hdf5_readable_spec(resource, dataset)
                dtype = spec if isinstance(spec,
                                           tf.dtypes.DType) else spec.dtype
            self._resource = resource
            self._component = dataset
            self._shape = shape
            self._dtype = dtype

            step = 1024
            indices_start = tf.data.Dataset.range(0, shape[0], step)
            indices_stop = indices_start.skip(1).concatenate(
                tf.data.Dataset.from_tensor_slices([shape[0]]))
            dataset = tf.data.Dataset.zip((indices_start, indices_stop))

            def f(start, stop):
                return core_ops.io_hdf5_readable_read(
                    self._resource,
                    component=self._component,
                    shape=self._shape,
                    start=start,
                    stop=stop,
                    dtype=self._dtype,
                )

            dataset = dataset.map(f)
            dataset = dataset.unbatch()

            self._dataset = dataset
            super().__init__(self._dataset._variant_tensor)  # pylint: disable=protected-access
Example #4
0
 def __init__(self, filename, dataset, internal=True):
     """HDF5IODataset."""
     with tf.name_scope("HDF5IODataset") as scope:
         resource, _ = core_ops.io_hdf5_readable_init(
             filename,
             container=scope,
             shared_name="%s/%s" % (filename, uuid.uuid4().hex))
         shape, dtype = core_ops.io_hdf5_readable_spec(resource, dataset)
         shape = tf.TensorShape(
             [None if e < 0 else e for e in shape.numpy()])
         dtype = tf.as_dtype(dtype.numpy())
         capacity = 4096
         super(HDF5IODataset, self).__init__(_HDF5IODatasetFunction(
             core_ops.io_hdf5_readable_read, resource, dataset, shape,
             dtype),
                                             capacity=capacity,
                                             internal=internal)
Example #5
0
  def __init__(self,
               filename,
               dataset,
               internal=True):
    """HDF5IODataset."""
    with tf.name_scope("HDF5IODataset") as scope:
      assert internal

      # TODO: unique shared_name might be removed if HDF5 is thead-safe?
      resource, _ = core_ops.io_hdf5_readable_init(
          filename,
          container=scope,
          shared_name="%s/%s" % (filename, uuid.uuid4().hex))
      shape, dtype = core_ops.io_hdf5_readable_spec(resource, dataset)
      dtype = tf.as_dtype(dtype.numpy())

      self._resource = resource
      self._component = dataset
      self._shape = shape
      self._dtype = dtype

      step = 1024
      indices_start = tf.data.Dataset.range(0, shape[0], step)
      indices_stop = indices_start.skip(1).concatenate(
          tf.data.Dataset.from_tensor_slices([shape[0]]))
      dataset = tf.data.Dataset.zip((indices_start, indices_stop))
      def f(start, stop):
        shape = tf.concat(
            [tf.convert_to_tensor([stop - start], tf.int64), self._shape[1:]],
            axis=0)
        return core_ops.io_hdf5_readable_read(
            self._resource, start=start, shape=shape,
            component=self._component, dtype=self._dtype)
      dataset = dataset.map(f)
      dataset = dataset.unbatch()

      self._dataset = dataset
      super(HDF5IODataset, self).__init__(self._dataset._variant_tensor) # pylint: disable=protected-access
Example #6
0
 def f(column):
     shape, dtype = core_ops.io_hdf5_readable_spec(resource, column)
     return shape, dtype