Exemple #1
0
 def setup(self):
     """Sets up the beam bundle."""
     # pylint: disable=g-import-not-at-top, import-outside-toplevel
     import tensorstore as ts
     self._ds_in = ts.open(self._input_spec).result()
     self._shape = self._ds_in.domain.shape
     self._dtype = self._ds_in.dtype.numpy_dtype
     self._ds_out = ts.open(self._output_spec).result()
Exemple #2
0
    def check_vol(box_zyx, scale):
        # raw volume handle
        spec = dict(config["tensorstore"]["spec"])
        spec['scale_index'] = scale
        context = ts.Context(config["tensorstore"]["context"])
        store = ts.open(spec, read=True, write=False, context=context).result()
        store_box = np.array([
            store.spec().domain.inclusive_min[:3][::-1],
            store.spec().domain.exclusive_max[:3][::-1]
        ])

        # Just verify that the 'service' wrapper is consistent with the low-level handle
        assert service.dtype == store.dtype.numpy_dtype
        assert (service.bounding_box_zyx // (2**scale) == store_box).all(), \
            f"{service.bounding_box_zyx.tolist()} != {store_box.tolist()}"

        if scale == 0:
            # Service INSERTS geometry into config if necessary
            assert config["geometry"][
                "bounding-box"] == store_box[:, ::-1].tolist()

        store_subvol = store[box_to_slicing(*box_zyx[:, ::-1])].read(
            order='F').result().transpose()
        assert store_subvol.any(
        ), "Volume from raw API is all zeros; this is a bad test"

        subvol = service.get_subvolume(box_zyx, scale)
        assert subvol.any(), "Volume from service is all zeros"

        assert (subvol.shape == (box_zyx[1] - box_zyx[0])).all()
        assert (subvol == store_subvol).all()
Exemple #3
0
def slice(startstr, sizestr, format):
    """
    Retrieve slice with specified format
    """
    try:
        start_arr = startstr.split("_")
        start = [int(start_arr[0]), int(start_arr[1]), int(start_arr[2])]

        size_arr = sizestr.split("_")
        size = [int(size_arr[0]), int(size_arr[1]), int(size_arr[2])]

        location = request.args.get("location")

        if size[0] != 1 and size[1] != 1 and size[2] != 1:
            return Response("one dimension must be size 1", 400)

        location_arr = location.split('/')
        bucket = location_arr[0]
        path = '/'.join(location_arr[1:])
        stderr = sys.stderr

        cache_key = f"{location}_{startstr}_{sizestr}_{format}"
        try:
            resp = SLICE_CACHE.get(cache_key)
        except Exception:
            # reuse tensorstore object
            dataset = ts.open({
                'driver': 'neuroglancer_precomputed',
                'kvstore': {
                    'driver': 'gcs',
                    'bucket': bucket,
                },
                'path': path,
                'recheck_cached_data': 'open'
            }).result()
            dataset = dataset[ts.d['channel'][0]]

            x, y, z = start
            sx, sy, sz = size
            data = dataset[x:x + sx, y:y + sy,
                           z:z + sz].read(order='F').result()

            # write 2D image to jpeg or png
            data = np.squeeze(data)
            imgByteArr = io.BytesIO()
            im = Image.fromarray(data.transpose((1, 0)))
            im.save(imgByteArr, format=format)
            resp = imgByteArr.getvalue()
            SLICE_CACHE.put(cache_key, resp)
        r = make_response(resp)
        r.headers.set('Content-Type', f"image/{format}")
        return r

    except Exception as e:
        return Response(traceback.format_exc(), 400)
Exemple #4
0
async def test_memory_n5_cache_open():
    ts.open({
        "context": {
            "cache_pool": {
                "total_bytes_limit": 1000000
            }
        },
        "driver": "n5",
        "kvstore": {
            "driver": "memory",
        },
        "metadata": {
            "compression": {
                "type": "gzip"
            },
            "dataType": "uint32",
            "dimensions": [1000, 20000],
            "blockSize": [10, 10],
        },
        "create": True,
        "delete_existing": True,
    }).result()
Exemple #5
0
 def _open_labels(self):
     labels_file = self.labels_file
     if self.tensorstore:
         # labels file should be the spec dict for tensorstore
         labels = ts.open(labels_file, create=False, open=True).result()
         if not self.gt_file:
             # we need to apply the slice and so need to construct
             # the correct tuple of int / slices
             labels = labels[self.time_index]
     else:
         labels = zarr.open(labels_file, mode='r+')
         if not self.gt_file:
             labels = labels[self.time_index]
     return labels
Exemple #6
0
async def async_deserialize(mesh,
                            mesh_axes,
                            tensorstore_spec,
                            global_shape=None):
    t = ts.open(ts.Spec(tensorstore_spec), open=True).result()
    shape = t.shape if global_shape is None else global_shape
    new_shard_shape = gda.get_shard_shape(shape, mesh, mesh_axes)

    async def cb(index):
        out = np.zeros(new_shard_shape, dtype=t.dtype.numpy_dtype)
        requested_domain = ts.IndexTransform(input_shape=shape)[index].domain
        restricted_domain = t.domain.intersect(requested_domain)
        await ts.array(out)[ts.d[:].translate_to[requested_domain.origin]
                            ][restricted_domain].write(t[restricted_domain])
        return out

    return await create_async_gda_from_callback(shape, mesh, mesh_axes, cb)
Exemple #7
0
def prepare_tensorstore_from_pyramid(
    pyr: Sequence[DataArray],
    level_names: Sequence[str],
    jpeg_quality: int,
    output_chunks: Sequence[int],
    root_container_path: Path,
):
    store_arrays = []
    # sharding = {'@type': 'neuroglancer_uint64_sharded_v1',
    #       'preshift_bits': 9,
    #        'hash': 'identity',
    #        'minishard_index_encoding': 'gzip',
    #       'minishard_bits': 6,
    #       'shard_bits': 15}

    for p, ln in zip(pyr, level_names):
        res = [abs(float(p.coords[k][1] - p.coords[k][0])) for k in p.dims]
        spec: Dict[str, Any] = {
            "driver": "neuroglancer_precomputed",
            "kvstore": {
                "driver": "file",
                "path": str(Path(root_container_path).parent),
            },
            "path": root_container_path.parts[-1],
            "scale_metadata": {
                "size": p.shape,
                "resolution": res,
                "encoding": "jpeg",
                "jpeg_quality": jpeg_quality,
                #'sharding': sharding,
                "chunk_size": output_chunks,
                "key": ln,
                "voxel_offset": (0, 0, 0),
            },
            "multiscale_metadata": {
                "data_type": p.dtype.name,
                "num_channels": 1,
                "type": "image",
            },
        }
        try:
            ts.open(spec=spec, open=True).result()
        except ValueError:
            try:
                ts.open(spec=spec, create=True).result()
            except ValueError:
                ts.open(spec=spec, create=True, delete_existing=True).result()

        nicer_array = NicerTensorStore(spec=spec, open_kwargs={"write": True})
        store_arrays.append(nicer_array)
    return store_arrays
Exemple #8
0
async def async_serialize(gda_inp: gda.GlobalDeviceArray, tensorstore_spec,
                          commit_future=None):
  # 'metadata' may not be present at the top level (for example, if we are using
  # a 'cast' driver).
  if not _spec_has_metadata(tensorstore_spec):
    tensorstore_spec['metadata'] = _get_metadata(gda_inp)

  if jax.process_index() == 0:
    open_future = ts.open(
        ts.Spec(tensorstore_spec), create=True, open=True, context=TS_CONTEXT)
    # Asynchronous case.
    if commit_future is not None:
      assert isinstance(commit_future, list)
      commit_future.append(open_future)
    else:
      await open_future

  # `ts.open` runs twice for process 0 because for the first time, we just get
  # the future to be awaited upon in the background thread. The second one runs
  # with `assume_metadata=True` which does no I/O operation and returns the
  # tensorstore object.
  # For every process other than `0`, we open with `assume_metadata=True`.
  t = await ts.open(
      ts.Spec(tensorstore_spec), open=True, assume_metadata=True, context=TS_CONTEXT)

  async def _write_array(shard):
    if shard.replica_id == 0:
      write_future = t[shard.index].write(shard.data)
      if commit_future is not None:
        assert isinstance(commit_future, list)
        commit_future.append(write_future.commit)
        await write_future.copy
      else:
        await write_future.commit

  future_write_state = jax.tree_util.tree_map(_write_array,
                                              gda_inp.local_shards)
  return await asyncio.gather(*future_write_state)
Exemple #9
0
def volume3d_ng(location,
                bbox,
                size=132,
                seed=None,
                array=None,
                cloudrun=None,
                sample_array=False,
                sample_class=False):
    """Returns a dataset based on a generator that will produce an infinite number of 3D volumes
    from ng precomputed randomly from given bounding box or from provided list of ROIs.

    Note: only support uint8blk.

    Note: this 

    Args:
        loccation (str): directory for precomputed volume (assume google bucket directories)
        bbox (tuple of tuples): ((x,y,z), (x2,y2,z2)) start and stop location for samples
        size (int): size of dimension
        array (list): list of rois
        cloudrun (str) location of cloud run service to retrieve volumes (if not
            provided use the locally installed tensorstore library)
    Returns:
        tf.dataset containing uint8 3D tensors
    """

    if cloudrun is None:
        try:
            import tensorstore as ts
        except ImportError:
            raise Exception("tensorstore not installed")

    def generator():
        if array is not None and sample_array == False:
            for start in array:
                yield start
        else:
            # make repeatable if a seed is set
            if seed is not None:
                tf.random.set_seed(seed)

            while True:
                curr_bbox = bbox
                if array is not None:
                    tarray = array
                    if sample_class:
                        cspot = tf.random.uniform(shape=[],
                                                  minval=0,
                                                  maxval=len(array),
                                                  dtype=tf.int64,
                                                  seed=seed)
                        tarray = array[cspot]
                    spot = tf.random.uniform(shape=[],
                                             minval=0,
                                             maxval=len(tarray),
                                             dtype=tf.int64,
                                             seed=seed)
                    curr_bbox = tarray[spot]

                #  get random starting point from bbox (x1,y1,z1) (x2,y2,z2)

                xstart = tf.random.uniform(shape=[],
                                           minval=curr_bbox[0][0],
                                           maxval=curr_bbox[1][0],
                                           dtype=tf.int64,
                                           seed=seed)
                ystart = tf.random.uniform(shape=[],
                                           minval=curr_bbox[0][1],
                                           maxval=curr_bbox[1][1],
                                           dtype=tf.int64,
                                           seed=seed)
                zstart = tf.random.uniform(shape=[],
                                           minval=curr_bbox[0][2],
                                           maxval=curr_bbox[1][2],
                                           dtype=tf.int64,
                                           seed=seed)
                yield (xstart, ystart, zstart)

    location_arr = location.split('/')
    bucket = location_arr[0]
    path = '/'.join(location_arr[1:])

    if cloudrun is None:
        # reuse tensorstore object
        dataset = ts.open({
            'driver': 'neuroglancer_precomputed',
            'kvstore': {
                'driver': 'gcs',
                'bucket': bucket,
            },
            'path': path,
            'recheck_cached_data': 'open',
            'scale_index': 0
        }).result()
        dataset = dataset[ts.d['channel'][0]]
    else:
        import requests
        token = subprocess.check_output(["gcloud auth print-identity-token"],
                                        shell=True).decode()
        headers = {}
        headers["Authorization"] = f"Bearer {token[:-1]}"
        headers["Content-type"] = "application/json"

    #@tf.function
    def mapper(xstart, ystart, zstart):
        #xstart = xstart.numpy()
        #ystart = ystart.numpy()
        #zstart = zstart.numpy()
        if cloudrun is None:
            # read from tensorstore
            data = dataset[xstart:(xstart + size), ystart:(ystart + size),
                           zstart:(zstart + size)].read().result()
            return tf.convert_to_tensor(data, dtype=tf.uint8)
        else:
            # read from cloud run function
            config = {
                "location": location,
                "size": [int(size), int(size), int(size)],
                "start": [int(xstart), int(ystart),
                          int(zstart)]
            }
            res = requests.post(cloudrun + "/volume",
                                data=json.dumps(config),
                                headers=headers)
            if res.status_code != 200:
                # refetch token if obsolete
                token = subprocess.check_output(
                    ["gcloud auth print-identity-token"], shell=True).decode()
                headers["Authorization"] = f"Bearer {token[:-1]}"
                res = requests.post(cloudrun + "/volume",
                                    data=json.dumps(config),
                                    headers=headers)
            if res.status_code != 200:
                raise RuntimeError("cloud run failed")
            data = np.fromstring(res.content, dtype=np.uint8)
            data = data.reshape((size, size, size))
            #data = data.transpose((2,1,0))
            return tf.convert_to_tensor(data, dtype=tf.uint8)

    #@tf.function
    def wrapper_mapper(x, y, z):
        tensor = tf.py_function(func=mapper, inp=(x, y, z), Tout=tf.uint8)
        tensor.set_shape((size, size, size))
        return tensor

    return tf.data.Dataset.from_generator(
        generator, output_types=(tf.int64, tf.int64, tf.int64)
    ).map(
        wrapper_mapper, num_parallel_calls=AUTOTUNE
    )  # ideally set to some concurrency that matches number of parallel http calls possible
Exemple #10
0
def volume():
    """
    Retrieve volume from tensorstore.
    See fetch_subvolume() function below for client-side example usage.
    """
    try:
        config_file = request.get_json()

        # Strip gs:// prefix
        location = config_file["location"]  # contains source and destination
        if location.startswith('gs://'):
            location = location[len('gs://'):]

        start = config_file["start"]  # in XYZ order
        size = config_file["size"]  # in XYZ order
        scale_index = config_file.get("scale_index", 0)
        use_jpeg = config_file.get("jpeg", False)

        location_arr = location.split('/')
        bucket = location_arr[0]
        path = '/'.join(location_arr[1:])
        stderr = sys.stderr

        # reuse tensorstore object
        dataset = ts.open({
            'driver': 'neuroglancer_precomputed',
            'kvstore': {
                'driver': 'gcs',
                'bucket': bucket,
            },
            'path': path,
            'recheck_cached_data': 'open',
            'scale_index': scale_index
        }).result()
        dataset = dataset[ts.d['channel'][0]]

        #      +--------------------------------------------------------------------+
        #      | A quick guide to 3D array index semantics and memory order choices |
        #      +--------------------------------------------------------------------+
        #
        # +-----------------+--------------+-----------------------------------------------+
        # | Index semantics | Memory order | Notes                                         |
        # +-----------------+--------------+-----------------------------------------------+
        # |        a[Z,Y,X] | C            | Standard for Python users. Prefer this.       |
        # |                 |              |                                               |
        # |        a[X,Y,Z] | F            | Identical memory layout to the above,         |
        # |                 |              | (the RAM contents are identical to the above) |
        # |                 |              | but due to the reverse index meaning, this    |
        # |                 |              | is likely to introduce confusion and/or       |
        # |                 |              | accidental inefficiences when you pass this   |
        # |                 |              | array to library functions which expect a     |
        # |                 |              | standard C-order array.                       |
        # |                 |              |                                               |
        # |        a[Z,Y,X] | F            | Never do this.                                |
        # |                 |              |                                               |
        # |        a[X,Y,Z] | C            | Never do this.                                |
        # +-----------------+--------------+-----------------------------------------------+

        # Unfortunately, TensorStore.read() always returns an [X,Y,Z]-indexed array,
        # but it does permit you to specify the memory ordering.
        # Therefore, we request F-order, the only sane choice.
        # The buffer we'll return to the caller can be interpreted as either F/XYZ or C/ZYX.
        # (That's their business.)

        x, y, z = start
        sx, sy, sz = size
        data = dataset[x:x + sx, y:y + sy, z:z + sz].read(order='F').result()
        if not use_jpeg or sz > 1:
            r = make_response(data.tobytes(order='F'))
            r.headers.set('Content-Type', 'application/octet-stream')
            return r
        else:
            # write 2D image to JPEG
            data = np.squeeze(data)
            imgByteArr = io.BytesIO()
            im = Image.fromarray(data.transpose((1, 0)))
            im.save(imgByteArr, format="JPEG")
            r = make_response(imgByteArr.getvalue())
            r.headers.set('Content-Type', 'image/jpeg')
            return r

    except Exception as e:
        return Response(traceback.format_exc(), 400)
Exemple #11
0
 def __setitem__(self, slices, values):
     ts.open(
         spec=self.spec,
         **self.open_kwargs).result()[ts.d["channel"][0]][slices] = values
     return None
Exemple #12
0
 def __getitem__(self, slices):
     return ts.open(spec=self.spec, **self.open_kwargs).result()[slices]