Example #1
0
def configure_multires_info(
  cloudpath:str,
  vertex_quantization_bits:int, 
  mesh_dir:str
):
  """
  Computes properties and uploads a multires 
  mesh info file
  """
  assert vertex_quantization_bits in (10, 16)

  vol = CloudVolume(cloudpath)

  mesh_dir = mesh_dir or vol.info.get("mesh", None)

  if not "mesh" in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  res = vol.meta.resolution(vol.mesh.meta.mip)

  cf = CloudFiles(cloudpath)
  info_filename = f'{mesh_dir}/info'
  mesh_info = cf.get_json(info_filename) or {}
  new_mesh_info = copy.deepcopy(mesh_info)
  new_mesh_info['@type'] = "neuroglancer_multilod_draco"
  new_mesh_info['vertex_quantization_bits'] = vertex_quantization_bits
  new_mesh_info['transform'] = [ 
    res[0], 0,      0,      0,
    0,      res[1], 0,      0,
    0,      0,      res[2], 0,
  ]
  new_mesh_info['lod_scale_multiplier'] = 1.0

  if new_mesh_info != mesh_info:
    cf.put_json(
      info_filename, new_mesh_info, 
      cache_control="no-cache"
    )
Example #2
0
    def execute(self):
        self.vol = CloudVolume(self.cloudpath, cdn_cache=False)
        self.vol.mip = self.vol.skeleton.meta.mip

        fragment_filenames = self.get_filenames()
        skels = self.get_skeletons_by_segid(fragment_filenames)

        skeletons = []
        for segid, frags in skels.items():
            skeleton = self.fuse_skeletons(frags)
            if self.max_cable_length is None or skel.cable_length(
            ) <= self.max_cable_length:
                skeleton = kimimaro.postprocess(skeleton, self.dust_threshold,
                                                self.tick_threshold)
            skeleton.id = segid
            skeletons.append(skeleton)

        self.vol.skeleton.upload(skeletons)

        if self.delete_fragments:
            cf = CloudFiles(self.cloudpath, progress=True)
            cf.delete(fragment_filenames)
Example #3
0
    def fetch_provenance(self):
        """
    Refresh the current provenance file from primary storage (e.g. the cloud)
    without reference to cache. The cache will not be updated.
  
    Raises cloudvolume.exceptions.provenanceUnavailableError when the info file 
    is unable to be retrieved.

    See also: refresh_provenance

    Returns: dict
    """
        cf = CloudFiles(self.cloudpath, secrets=self.config.secrets)
        provfile = cf.get('provenance')
        if provfile:
            provfile = provfile.decode('utf-8')

            # The json5 decoder is *very* slow
            # so use the stricter but much faster json
            # decoder first, and try it only if it fails.
            try:
                provfile = json.loads(provfile)
            except json.decoder.JSONDecodeError:
                try:
                    provfile = json5.loads(provfile)
                except ValueError:
                    raise ValueError(
                        red("""The provenance file could not be JSON decoded. 
            Please reformat the provenance file before continuing. 
            Contents: {}""".format(provfile)))
        else:
            provfile = {
                "sources": [],
                "owners": [],
                "processing": [],
                "description": "",
            }

        return self._cast_provenance(provfile)
def test_http_read():
    from cloudfiles import CloudFiles, exceptions
    cf = CloudFiles(
        "https://storage.googleapis.com/seunglab-test/test_v0/black/")
    info = cf.get_json('info')

    assert info == {
        "data_type":
        "uint8",
        "num_channels":
        1,
        "scales": [{
            "chunk_sizes": [[64, 64, 50]],
            "encoding": "raw",
            "key": "6_6_30",
            "resolution": [6, 6, 30],
            "size": [1024, 1024, 100],
            "voxel_offset": [0, 0, 0]
        }],
        "type":
        "image"
    }
Example #5
0
def MultiResShardedFromUnshardedMeshMergeTask(
  src:str,
  dest:str,
  shard_no:str,
  cache_control:bool = False,
  draco_compression_level:int = 1,
  mesh_dir:Optional[str] = None,
  num_lod:int = 1,
  progress:bool = False,
):
  cv_src = CloudVolume(src)

  if mesh_dir is None and 'mesh' in cv.info:
    mesh_dir = cv.info['mesh']

  cv_dest = CloudVolume(dest, mesh_dir=mesh_dir, progress=True)

  labels = labels_for_shard(cv_dest, shard_no)
  meshes = cv_src.mesh.get(labels, fuse=False)
  del labels
    
  fname, shard = create_mesh_shard(
    cv_dest, meshes, 
    num_lod, draco_compression_level,
    progress, shard_no
  )
  del meshes

  if shard is None:
    return

  cf = CloudFiles(cv_dest.mesh.meta.layerpath)
  cf.put(
    fname, shard, # fname, data
    compress=False,
    content_type='application/octet-stream',
    cache_control='no-cache',
  )
Example #6
0
  def execute(self):
    srccv = CloudVolume(self.src_path, mip=self.mip, fill_missing=True)

    # Accumulate a histogram of the luminance levels
    nbits = np.dtype(srccv.dtype).itemsize * 8
    levels = np.zeros(shape=(2 ** nbits,), dtype=np.uint64)

    bounds = Bbox(self.offset, self.shape[:3] + self.offset)
    bounds = Bbox.clamp(bounds, srccv.bounds)

    bboxes = self.select_bounding_boxes(bounds)
    for bbox in bboxes:
      img2d = srccv[bbox.to_slices()].reshape((bbox.volume()))
      cts = np.bincount(img2d)
      levels[0:len(cts)] += cts.astype(np.uint64)

    covered_area = sum([bbx.volume() for bbx in bboxes])

    bboxes = [(bbox.volume(), bbox.size3()) for bbox in bboxes]
    bboxes.sort(key=lambda x: x[0])
    biggest = bboxes[-1][1]

    output = {
      "levels": levels.tolist(),
      "patch_size": biggest.tolist(),
      "num_patches": len(bboxes),
      "coverage_ratio": covered_area / self.shape.rectVolume(),
    }

    path = self.levels_path if self.levels_path else self.src_path
    path = os.path.join(path, 'levels')

    cf = CloudFiles(path)
    cf.put_json(
      path="{}/{}".format(self.mip, self.offset.z),
      content=output,
      cache_control='no-cache'
    )
Example #7
0
def create_xfer_skeleton_tasks(
    src: str,
    dest: str,
    skel_dir: Optional[str] = None,
    magnitude=2,
):
    cv_src = CloudVolume(src)
    cf_dest = CloudFiles(dest)

    if not skel_dir:
        info = cf_dest.get_json("info")
        if info.get("skeletons", None):
            skel_dir = info.get("skeletons")

    cf_dest.put_json(f"{skel_dir}/info", cv_src.skeleton.meta.info)

    alphabet = [str(i) for i in range(10)]
    if cv_src.skeleton.meta.is_sharded():
        alphabet += ['a', 'b', 'c', 'd', 'e', 'f']

    prefixes = itertools.product(*([alphabet] * magnitude))
    prefixes = ["".join(x) for x in prefixes]

    # explicitly enumerate all prefixes smaller than the magnitude.
    for i in range(1, magnitude):
        explicit_prefix = itertools.product(*([alphabet] * i))
        explicit_prefix = ["".join(x) for x in explicit_prefix]
        prefixes += [f"{x}" for x in explicit_prefix]

    return [
        partial(
            TransferSkeletonFilesTask,
            src=src,
            dest=dest,
            prefix=prefix,
            skel_dir=skel_dir,
        ) for prefix in prefixes
    ]
Example #8
0
    def exists(self, segids, progress=None):
        """
    Checks if the mesh exists.

    Returns: { label: path or None, ... }
    """
        manifest_paths = [self.manifest_path(segid) for segid in segids]
        progress = progress if progress is not None else self.config.progress

        cf = CloudFiles(self.meta.cloudpath,
                        progress=progress,
                        green=self.config.green,
                        secrets=self.config.secrets)
        exists = cf.exists(manifest_paths)

        segid_regexp = re.compile(r'(\d+):0$')

        output = {}
        for path, there in exists.items():
            (segid, ) = re.search(segid_regexp, path).groups()
            output[segid] = path if there else None

        return output
Example #9
0
def get_mesh_filenames_subset(cloudpath: str, mesh_dir: str, prefix: str):
    prefix = f'{mesh_dir}/{prefix}'
    segids = defaultdict(list)

    cf = CloudFiles(cloudpath)
    meshexpr = re.compile(r'(\d+):(\d+):')
    for filename in cf.list(prefix=prefix):
        filename = os.path.basename(filename)
        # `match` implies the beginning (^). `search` matches whole string
        matches = re.search(meshexpr, filename)

        if not matches:
            continue

        segid, lod = matches.groups()
        segid, lod = int(segid), int(lod)

        if lod != 0:
            continue

        segids[segid].append(filename)

    return segids
Example #10
0
    def do_GET(self):
        if self.path.find('..') != -1:
            self.send_error(403, "Relative paths are not allowed.")
            raise ValueError("Relative paths are not allowed.")

        path = self.path[1:]
        data = CloudFiles(self.cloudpath).get(path)

        if data is None:
            self.send_error(404, '/' + path + ": Not Found")
            return

        self.send_response(200)
        self.serve_data(data)
Example #11
0
    def delete(self, bbox, mip=None):
        if mip is None:
            mip = self.config.mip

        if mip in self.meta.locked_mips():
            raise exceptions.ReadOnlyException(
                "MIP {} is currently write locked. If this should not be the case, run vol.meta.unlock_mips({})."
                .format(mip, mip))

        bbox = Bbox.create(bbox, self.meta.bounds(mip), bounded=True)
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to delete non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        cloudpaths = lambda: chunknames(
            realized_bbox,
            self.meta.bounds(mip),
            self.meta.key(mip),
            self.meta.chunk_size(mip),
            protocol=self.meta.path.protocol
        )  # need to regenerate so that generator isn't used up

        CloudFiles(self.meta.cloudpath, progress=self.config.progress, secrets=self.config.secrets) \
          .delete(cloudpaths())

        if len(self.lru) > 0:
            for path in cloudpaths():
                self.lru.pop(path, None)

        if self.cache.enabled:
            CloudFiles('file://' + self.cache.path, progress=self.config.progress, secrets=self.config.secrets) \
              .delete(cloudpaths())
Example #12
0
    def execute(self):
        # cache is necessary for local computation, but on GCE download is very fast
        # so cache isn't necessary.
        cv = CloudVolume(self.cloudpath, cache=False, progress=self.progress)

        # This looks messy because we are trying to avoid retaining
        # unnecessary memory. In the original iteration, this was
        # using 50 GB+ memory on minnie65. With changes to this
        # and the spatial_index, we are getting it down to something reasonable.
        locations = self.locations_for_labels(self.labels_for_shard(cv), cv)
        filenames = set(itertools.chain(*locations.values()))
        labels = set(locations.keys())
        del locations
        skeletons = self.get_unfused(labels, filenames, cv)
        del labels
        del filenames
        skeletons = self.process_skeletons(skeletons, in_place=True)

        if len(skeletons) == 0:
            return

        shard_files = synthesize_shard_files(cv.skeleton.reader.spec,
                                             skeletons)

        if len(shard_files) != 1:
            raise ValueError(
                "Only one shard file should be generated per task. Expected: {} Got: {} "
                .format(str(self.shard_no), ", ".join(shard_files.keys())))

        cf = CloudFiles(cv.skeleton.meta.layerpath, progress=self.progress)
        cf.puts(
            ((fname, data) for fname, data in shard_files.items()),
            compress=False,
            content_type='application/octet-stream',
            cache_control='no-cache',
        )
Example #13
0
    def dynamic_exists(self, labels, progress=None):
        """
    Checks for dynamic mesh existence.
  
    Returns: { label: path or None, ... }
    """
        labels = toiter(labels)

        checks = [self.compute_filename(label) for label in labels]

        cloudpath = self.meta.join(self.meta.meta.cloudpath,
                                   self.meta.mesh_path, 'dynamic')
        progress = progress if progress is not None else self.config.progress

        results = CloudFiles(cloudpath,
                             progress=progress,
                             green=self.config.green).exists(checks)

        output = {}
        for filepath, exists in results.items():
            label = int(os.path.basename(filepath)[:-2])  # strip :0
            output[label] = filepath if exists else None

        return output
Example #14
0
    def get_meshes_via_manifest_byte_offsets(self, seg_id, bounding_box):
        """    
    The manifest for sharded is a bit strange in that exists(..., return_byte_offset=True)
    is being called on the server side. To avoid duplicative delay by recomputing the offset
    locations, the manifest breaks encapsulation by returning the shard filename and byte
    offsets. This breaks enapsulation of the shard fetching logic rather severely but 
    it is probably worth it.
    """
        level = self.meta.meta.decode_layer_id(seg_id)
        dynamic_cloudpath = self.meta.join(self.meta.meta.cloudpath,
                                           self.dynamic_path())

        manifest = self.fetch_manifest(seg_id,
                                       level=level,
                                       bbox=bounding_box,
                                       return_segids=True)
        lists = self.parse_manifest_filenames(manifest)

        files = []
        if lists['dynamic']:
            files = CloudFiles(dynamic_cloudpath,
                               green=self.config.green).get(lists['dynamic'])

        dynamic_meshes = []
        while files:
            f = files.pop()
            mesh = Mesh.from_draco(f['content'])
            mesh.segid = int(os.path.basename(f['path']).split(':')[0])
            dynamic_meshes.append(mesh)

        fetches = []
        segid_map = {}
        for layer_id, filename, byte_start, size, segid in lists['initial']:
            path = self.meta.join(layer_id, filename)
            byte_end = byte_start + size
            fetches.append({
                'path': path,
                'start': byte_start,
                'end': byte_end,
            })
            segid_map[(path, byte_start, byte_end)] = segid

        cloudpath = self.meta.join(self.meta.meta.cloudpath,
                                   self.meta.mesh_path, 'initial')

        files = CloudFiles(cloudpath, green=self.config.green).get(fetches)
        initial_meshes = []
        while files:
            f = files.pop()
            mesh = Mesh.from_draco(f['content'])
            start, end = f['byte_range']
            key = (f['path'], start, end)
            mesh.segid = segid_map[key]
            initial_meshes.append(mesh)

        return dynamic_meshes + initial_meshes
Example #15
0
def download_chunk(meta, cache, cloudpath, mip, filename, fill_missing,
                   enable_cache, compress_cache, secrets):
    (file, ) = CloudFiles(cloudpath, secrets=secrets).get([filename], raw=True)
    content = file['content']

    if enable_cache:
        cache_content = next(compression.transcode(file,
                                                   compress_cache))['content']
        CloudFiles('file://' + cache.path).put(
            path=filename,
            content=(cache_content or b''),
            content_type=content_type(meta.encoding(mip)),
            compress=compress_cache,
            raw=bool(cache_content),
        )
        del cache_content

    if content is not None:
        content = compression.decompress(content, file['compress'])

    bbox = Bbox.from_filename(
        filename)  # possible off by one error w/ exclusive bounds
    img3d = decode(meta, filename, content, fill_missing, mip)
    return img3d, bbox
Example #16
0
  def download(self, paths, compress=None, progress=None):
    """
    Download the provided paths, but grab them from cache first
    if they are present and the cache is enabled. 

    Returns: { filename: content, ... }
    """
    if len(paths) == 0:
      return {}

    progress = nvl(progress, self.config.progress)
    compress = nvl(compress, self.compress, self.config.compress)

    locs = self.compute_data_locations(paths)
    locs['remote'] = [ str(x) for x in locs['remote'] ]

    fragments = {}
    if self.enabled:
      fragments = self.get(locs['local'], progress=progress)

    cf = CloudFiles(
      self.meta.cloudpath, 
      progress=progress, 
      secrets=self.config.secrets,
      parallel=self.config.parallel,
    )
    remote_fragments = cf.get(locs['remote'], raw=True)

    for frag in remote_fragments:
      if frag['error'] is not None:
        raise frag['error']

    if self.enabled:
      cf_cache = CloudFiles('file://' + self.path, progress=('to Cache' if progress else None))
      cf_cache.puts(
        compression.transcode(
          ( frag for frag in remote_fragments if frag['content'] is not None ),
          encoding=compress, progress=progress, in_place=False
        ),
        compress=compress,
        raw=True
      )

    remote_fragments_dict = {}
    while remote_fragments:
      res = remote_fragments.pop()
      remote_fragments_dict[res['path']] = compression.decompress(res['content'], res['compress'])

    fragments.update(remote_fragments_dict)
    return fragments
Example #17
0
    def labels_for_shard(self, cv):
        """
    Try to fetch precalculated labels from `$shardno.labels` (faster) otherwise, 
    compute which labels are applicable to this shard from the shard index (much slower).
    """
        labels = CloudFiles(
            cv.skeleton.meta.layerpath).get_json(self.shard_no + '.labels')
        if labels is not None:
            return labels

        labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution)
        spec = cv.skeleton.reader.spec

        return [
          lbl for lbl in tqdm(labels, desc="Computing Shard Numbers", disable=(not self.progress))  \
          if spec.compute_shard_location(lbl).shard_number == self.shard_no
        ]
Example #18
0
    def exists(self, bbox, mip=None):
        if mip is None:
            mip = self.config.mip

        bbox = Bbox.create(bbox, self.meta.bounds(mip), bounded=True)
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        cloudpaths = chunknames(realized_bbox,
                                self.meta.bounds(mip),
                                self.meta.key(mip),
                                self.meta.chunk_size(mip),
                                protocol=self.meta.path.protocol)

        return CloudFiles(self.meta.cloudpath,
                          progress=self.config.progress).exists(cloudpaths)
Example #19
0
def du(paths, grand_total, summarize, human_readable):
    """Display disk usage statistics."""
    results = []
    for path in paths:
        npath = normalize_path(path)
        if ispathdir(path):
            cf = CloudFiles(npath, green=True)
            results.append(cf.size(cf.list()))
        else:
            cf = CloudFiles(os.path.dirname(npath), green=True)
            sz = cf.size(os.path.basename(npath))
            if sz is None:
                print(f"cloudfiles: du: {path} does not exist")
                return
            results.append({path: sz})

    def SI(val):
        if not human_readable:
            return val

        if val < 1024:
            return f"{val} Bytes"
        elif val < 2**20:
            return f"{(val / 2**10):.2f} KiB"
        elif val < 2**30:
            return f"{(val / 2**20):.2f} MiB"
        elif val < 2**40:
            return f"{(val / 2**30):.2f} GiB"
        elif val < 2**50:
            return f"{(val / 2**40):.2f} TiB"
        elif val < 2**60:
            return f"{(val / 2**50):.2f} PiB"
        else:
            return f"{(val / 2**60):.2f} EiB"

    summary = {}
    for path, res in zip(paths, results):
        summary[path] = sum(res.values())
        if summarize:
            print(f"{SI(summary[path])}\t{path}")

    if not summarize:
        for res in results:
            for pth, size in res.items():
                print(f"{SI(size)}\t{pth}")

    if grand_total:
        print(f"{SI(sum(summary.values()))}\ttotal")
Example #20
0
def _rm(path, recursive, progress, parallel, block_size):
    npath = normalize_path(path)
    many, flat, prefix = get_mfp(path, recursive)

    cfpath = npath if ispathdir(path) else os.path.dirname(npath)
    xferpaths = os.path.basename(npath)
    if many:
        xferpaths = CloudFiles(cfpath, green=True).list(prefix=prefix,
                                                        flat=flat)

    if parallel == 1 or not many:
        __rm(cfpath, progress, xferpaths)
        return

    fn = partial(__rm, cfpath, False)
    with tqdm(desc="Deleting", disable=(not progress)) as pbar:
        with pathos.pools.ProcessPool(parallel) as executor:
            for _ in executor.imap(fn, sip(xferpaths, block_size)):
                pbar.update(block_size)
Example #21
0
  def download_single_as(
    self, path, local_alias, 
    compress=None, start=None, end=None
  ):
    """
    Download a file or a byte range from a file 
    and save it locally as `local_alias`.
    """
    if self.enabled:
      locs = self.compute_data_locations([local_alias])
      if locs['local']:
        return self.get_single(local_alias)

    filedata = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets)[path, start:end]

    if self.enabled:
      self.put([ (local_alias, filedata) ], compress=compress)

    return filedata