Beispiel #1
0
    def _upload_individuals(self, mesh_binaries, generate_manifests):
        cf = CloudFiles(self.layer_path)

        content_type = "model/mesh"
        if self.options["encoding"] == "draco":
            content_type = "model/x.draco"

        cf.puts(
            ((f"{self._mesh_dir}/{segid}:{self.options['lod']}:{self._bounds.to_filename()}",
              mesh_binary) for segid, mesh_binary in mesh_binaries.items()),
            compress=self._encoding_to_compression_dict[
                self.options['encoding']],
            cache_control=self.options['cache_control'],
            content_type=content_type,
        )

        if generate_manifests:
            cf.put_jsons(
                ((f"{self._mesh_dir}/{segid}:{self.options['lod']}", {
                    "fragments": [
                        f"{segid}:{self.options['lod']}:{self._bounds.to_filename()}"
                    ]
                }) for segid, mesh_binary in mesh_binaries.items()),
                compress=None,
                cache_control=self.options['cache_control'],
            )
Beispiel #2
0
    def upload(self,
               files,
               compress,
               cache_control,
               compress_level=None,
               content_type=None,
               progress=None):
        files = list(files)

        progress = progress if progress is not None else self.config.progress

        cf = CloudFiles(self.meta.cloudpath,
                        progress=progress,
                        secrets=self.config.secrets)
        files = list(
            compression.transcode(files,
                                  encoding=compress,
                                  level=compress_level))
        cf.puts(
            files,
            compress=compress,
            compression_level=compress_level,
            cache_control=cache_control,
            content_type=content_type,
            raw=True,
        )

        if self.enabled:
            self.put(files, compress=compress)
            cf_cache = CloudFiles('file://' + self.path,
                                  progress=('to Cache' if progress else None))
            cf_cache.puts(files, compress=compress, raw=True)
def test_slice_notation():
    from cloudfiles import CloudFiles, exceptions
    path = '/tmp/cloudfiles/slice_notation'
    rmtree(path)
    cf = CloudFiles('file://' + path)

    N = 128

    content = b'some_string'
    cf.puts(((str(i), content) for i in range(N)))
    assert sorted(list(cf)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cf[:]] == [content] * N

    assert sorted([f['path'] for f in cf[:100]
                   ]) == sorted([str(i) for i in range(N)])[:100]
    assert [f['content'] for f in cf[:100]] == [content] * 100

    assert sorted([f['path'] for f in cf[100:]
                   ]) == sorted([str(i) for i in range(N)])[100:]
    assert [f['content'] for f in cf[100:]] == [content] * (N - 100)

    assert sorted([f['path'] for f in cf[50:60]
                   ]) == sorted([str(i) for i in range(N)])[50:60]
    assert [f['content'] for f in cf[50:60]] == [content] * 10

    assert sorted([f['path']
                   for f in cf[:0]]) == sorted([str(i) for i in range(N)])[:0]
    assert [f['content'] for f in cf[:0]] == [content] * 0
Beispiel #4
0
    def download(self, paths, compress=None, progress=None):
        """
    Download the provided paths, but grab them from cache first
    if they are present and the cache is enabled. 

    Returns: { filename: content, ... }
    """
        if len(paths) == 0:
            return {}

        progress = nvl(progress, self.config.progress)
        compress = nvl(compress, self.compress, self.config.compress)

        locs = self.compute_data_locations(paths)
        locs['remote'] = [str(x) for x in locs['remote']]

        fragments = {}
        if self.enabled:
            fragments = self.get(locs['local'], progress=progress)

        # fixes e.g. mesh\info -> mesh/info on Windows
        if self.meta.path.protocol != 'file' and os.path.sep == '\\':
            fragments = {
                "/".join(key.split('\\')): val
                for key, val in fragments.items()
            }

        cf = CloudFiles(
            self.meta.cloudpath,
            progress=progress,
            secrets=self.config.secrets,
            parallel=self.config.parallel,
        )
        remote_fragments = cf.get(locs['remote'], raw=True)

        for frag in remote_fragments:
            if frag['error'] is not None:
                raise frag['error']

        if self.enabled:
            cf_cache = CloudFiles('file://' + self.path,
                                  progress=('to Cache' if progress else None))
            cf_cache.puts(compression.transcode(
                (frag
                 for frag in remote_fragments if frag['content'] is not None),
                encoding=compress,
                progress=progress,
                in_place=False),
                          compress=compress,
                          raw=True)

        remote_fragments_dict = {}
        while remote_fragments:
            res = remote_fragments.pop()
            remote_fragments_dict[res['path']] = compression.decompress(
                res['content'], res['compress'])

        fragments.update(remote_fragments_dict)
        return fragments
Beispiel #5
0
def MultiResShardedMeshMergeTask(cloudpath: str,
                                 shard_no: str,
                                 draco_compression_level: int = 1,
                                 mesh_dir: Optional[str] = None,
                                 num_lod: int = 1,
                                 spatial_index_db: Optional[str] = None,
                                 progress: bool = False):
    cv = CloudVolume(cloudpath, spatial_index_db=spatial_index_db)
    cv.mip = cv.mesh.meta.mip
    if mesh_dir is None and 'mesh' in cv.info:
        mesh_dir = cv.info['mesh']

    # This looks messy because we are trying to avoid retaining
    # unnecessary memory. In the original skeleton iteration, this was
    # using 50 GB+ memory on minnie65. So it makes sense to be just
    # as careful with a heavier type of object.
    locations = locations_for_labels(cv, labels_for_shard(cv, shard_no))
    filenames = set(itertools.chain(*locations.values()))
    labels = set(locations.keys())
    del locations
    meshes = collect_mesh_fragments(cv, labels, filenames, mesh_dir, progress)
    del labels
    del filenames
    meshes = {
        label: process_mesh(cv, label, mesh_frags, num_lod,
                            draco_compression_level)
        for label, mesh_frags in tqdm(meshes.items(), disable=(not progress))
    }
    data_offset = {
        label: len(manifest)
        for label, (manifest, mesh) in meshes.items()
    }
    meshes = {
        label: mesh + manifest.to_binary()
        for label, (manifest, mesh) in meshes.items()
    }

    if len(meshes) == 0:
        return

    shard_files = synthesize_shard_files(cv.mesh.reader.spec, meshes,
                                         data_offset)
    del meshes
    del data_offset

    if len(shard_files) != 1:
        raise ValueError("Only one shard file should be generated per task. "
                         "Expected: {} Got: {} ".format(
                             str(shard_no), ", ".join(shard_files.keys())))

    cf = CloudFiles(cv.mesh.meta.layerpath, progress=progress)
    cf.puts(
        ((fname, data) for fname, data in shard_files.items()),
        compress=False,
        content_type='application/octet-stream',
        cache_control='no-cache',
    )
Beispiel #6
0
    def put(self, files, progress=None, compress=None, compress_level=None):
        """files: [ (filename, content) ]"""
        if progress is None:
            progress = self.config.progress

        if compress is None:
            compress = self.compress

        if compress is None:
            compress = self.config.compress

        save_location = 'file://' + self.path
        progress = 'to Cache' if progress else None
        cf = CloudFiles(save_location, progress=progress)
        cf.puts(files, compress=compress, compression_level=compress_level)
Beispiel #7
0
    def download(self, paths, compress=None, progress=None):
        """
    Download the provided paths, but grab them from cache first
    if they are present and the cache is enabled. 

    Returns: { filename: content, ... }
    """
        if len(paths) == 0:
            return {}

        progress = nvl(progress, self.config.progress)
        compress = nvl(compress, self.compress, self.config.compress)

        locs = self.compute_data_locations(paths)
        locs['remote'] = [str(x) for x in locs['remote']]

        fragments = {}
        if self.enabled:
            fragments = self.get(locs['local'], progress=progress)

        cf = CloudFiles(self.meta.cloudpath,
                        progress=progress,
                        secrets=self.config.secrets)
        remote_fragments = cf.get(locs['remote'], raw=True)

        for frag in remote_fragments:
            if frag['error'] is not None:
                raise frag['error']

        if self.enabled:
            cf_cache = CloudFiles('file://' + self.path,
                                  progress=('to Cache' if progress else None))
            cf_cache.puts(compression.transcode(
                (frag
                 for frag in remote_fragments if frag['content'] is not None),
                encoding=compress,
                progress=progress,
                in_place=False),
                          compress=compress,
                          raw=True)

        remote_fragments = {
          res['path']: compression.decompress(res['content'], res['compress']) \
          for res in remote_fragments
        }

        fragments.update(remote_fragments)
        return fragments
    def upload_individuals(self, vol, path, bbox, skeletons):
        skeletons = skeletons.values()

        if not self.will_postprocess:
            vol.skeleton.upload(skeletons)
            return

        bbox = bbox * vol.resolution
        cf = CloudFiles(path, progress=vol.progress)
        cf.puts(
            ((f"{skel.id}:{bbox.to_filename()}", pickle.dumps(skel))
             for skel in skeletons),
            compress='gzip',
            content_type="application/python-pickle",
            cache_control=False,
        )
def test_transfer_semantics(compression):
    from cloudfiles import CloudFiles, exceptions
    path = '/tmp/cloudfiles/xfer'
    rmtree(path)
    cff = CloudFiles('file://' + path)
    cfm = CloudFiles('mem://cloudfiles/xfer')

    N = 128

    content = b'some_string'
    cff.puts(((str(i), content) for i in range(N)), compress=compression)
    assert sorted(list(cff)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cff[:]] == [content] * N

    assert sorted([f['path'] for f in cff[:100]
                   ]) == sorted([str(i) for i in range(N)])[:100]
    assert [f['content'] for f in cff[:100]] == [content] * 100

    cfm[:] = cff
    assert sorted(list(cfm)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cfm[:]] == [content] * N

    cfm.delete(list(cfm))
    assert list(cfm) == []

    cfm.transfer_from('file://' + path)
    assert sorted(list(cfm)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cfm[:]] == [content] * N

    cfm.delete(list(cfm))

    cff.transfer_to(cfm.cloudpath)
    assert sorted(list(cfm)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cfm[:]] == [content] * N
    cfm.delete(list(cfm))

    cff.transfer_to(cfm.cloudpath, reencode='br')
    assert sorted(list(cfm)) == sorted([str(i) for i in range(N)])
    assert [f['content'] for f in cfm[:]] == [content] * N

    data = cfm._get_connection()._data
    data = [os.path.splitext(d)[1] for d in data.keys()]
    assert all([ext == '.br' for ext in data])

    cfm.delete(list(cfm))
    cff.delete(list(cff))
    def upload(self,
               files,
               compress,
               cache_control,
               compress_level=None,
               content_type=None,
               progress=None):
        files = list(files)

        progress = progress if progress is not None else self.config.progress

        cf = CloudFiles(self.meta.cloudpath, progress=progress)
        cf.puts(files,
                compress=compress,
                compression_level=compress_level,
                cache_control=cache_control,
                content_type=content_type)

        if self.enabled:
            self.put(files, compress=compress)
Beispiel #11
0
def test_get_generator(num_threads, green):
  from cloudfiles import CloudFiles, exceptions
  path = '/tmp/cloudfiles/gen'
  rmtree(path)
  url = 'file://' + path

  cf = CloudFiles(url, num_threads=num_threads, green=green)

  gen = ( (str(i), b'hello world') for i in range(100) )
  cf.puts(gen)

  files = cf.get(( str(i) for i in range(100) ), total=100)

  assert all([ f['error'] is None for f in files ])
  assert len(files) == 100
  assert all([ f['content'] == b'hello world' for f in files ])

  fnames = [ str(i) for i in range(100) ]
  assert sorted(list(cf.list())) == sorted(fnames)

  cf.delete(( str(i) for i in range(100) ))
  assert list(cf.list()) == []
Beispiel #12
0
def ShardedFromUnshardedSkeletonMergeTask(
    src: str,
    dest: str,
    shard_no: str,
    cache_control: bool = False,
    skel_dir: Optional[str] = None,
    progress: bool = False,
):
    cv_src = CloudVolume(src)

    if skel_dir is None and 'skeletons' in cv.info:
        skel_dir = cv.info['skeletons']

    cv_dest = CloudVolume(dest, skel_dir=skel_dir, progress=progress)

    labels = labels_for_shard(cv_dest, shard_no, progress)
    skeletons = cv_src.skeleton.get(labels)
    del labels

    if len(skeletons) == 0:
        return

    skeletons = strip_integer_attributes(skeletons)
    skeletons = {skel.id: skel.to_precomputed() for skel in skeletons}
    shard_files = synthesize_shard_files(cv_dest.skeleton.reader.spec,
                                         skeletons)

    if len(shard_files) != 1:
        raise ValueError(
            "Only one shard file should be generated per task. Expected: {} Got: {} "
            .format(str(shard_no), ", ".join(shard_files.keys())))

    cf = CloudFiles(cv_dest.skeleton.meta.layerpath, progress=progress)
    cf.puts(
        ((fname, data) for fname, data in shard_files.items()),
        compress=False,
        content_type='application/octet-stream',
        cache_control='no-cache',
    )
Beispiel #13
0
    def execute(self):
        # cache is necessary for local computation, but on GCE download is very fast
        # so cache isn't necessary.
        cv = CloudVolume(self.cloudpath,
                         progress=self.progress,
                         spatial_index_db=self.spatial_index_db)

        # This looks messy because we are trying to avoid retaining
        # unnecessary memory. In the original iteration, this was
        # using 50 GB+ memory on minnie65. With changes to this
        # and the spatial_index, we are getting it down to something reasonable.
        locations = self.locations_for_labels(
            labels_for_shard(cv, self.shard_no, self.progress), cv)
        filenames = set(itertools.chain(*locations.values()))
        labels = set(locations.keys())
        del locations
        skeletons = self.get_unfused(labels, filenames, cv)
        del labels
        del filenames
        skeletons = self.process_skeletons(skeletons, in_place=True)

        if len(skeletons) == 0:
            return

        shard_files = synthesize_shard_files(cv.skeleton.reader.spec,
                                             skeletons)

        if len(shard_files) != 1:
            raise ValueError(
                "Only one shard file should be generated per task. Expected: {} Got: {} "
                .format(str(self.shard_no), ", ".join(shard_files.keys())))

        cf = CloudFiles(cv.skeleton.meta.layerpath, progress=self.progress)
        cf.puts(
            ((fname, data) for fname, data in shard_files.items()),
            compress=False,
            content_type='application/octet-stream',
            cache_control='no-cache',
        )
Beispiel #14
0
    def transfer_to(self,
                    cloudpath,
                    bbox,
                    mip,
                    block_size=None,
                    compress=True,
                    compress_level=None):
        """
    Transfer files from one storage location to another, bypassing
    volume painting. This enables using a single CloudVolume instance
    to transfer big volumes. In some cases, gsutil or aws s3 cli tools
    may be more appropriate. This method is provided for convenience. It
    may be optimized for better performance over time as demand requires.

    cloudpath (str): path to storage layer
    bbox (Bbox object): ROI to transfer
    mip (int): resolution level
    block_size (int): number of file chunks to transfer per I/O batch.
    compress (bool): Set to False to upload as uncompressed
    """
        from cloudvolume import CloudVolume

        if mip is None:
            mip = self.config.mip

        if self.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded sources are not supported. got: {self.meta.cloudpath}"
            )

        bbox = Bbox.create(bbox, self.meta.bounds(mip))
        realized_bbox = bbox.expand_to_chunk_size(
            self.meta.chunk_size(mip), offset=self.meta.voxel_offset(mip))
        realized_bbox = Bbox.clamp(realized_bbox, self.meta.bounds(mip))

        if bbox != realized_bbox:
            raise exceptions.AlignmentError(
                "Unable to transfer non-chunk aligned bounding boxes. Requested: {}, Realized: {}"
                .format(bbox, realized_bbox))

        default_block_size_MB = 50  # MB
        chunk_MB = self.meta.chunk_size(mip).rectVolume() * np.dtype(
            self.meta.dtype).itemsize * self.meta.num_channels
        if self.meta.layer_type == 'image':
            # kind of an average guess for some EM datasets, have seen up to 1.9x and as low as 1.1
            # affinites are also images, but have very different compression ratios. e.g. 3x for kempressed
            chunk_MB /= 1.3
        else:  # segmentation
            chunk_MB /= 100.0  # compression ratios between 80 and 800....
        chunk_MB /= 1024.0 * 1024.0

        if block_size:
            step = block_size
        else:
            step = int(default_block_size_MB // chunk_MB) + 1

        try:
            destvol = CloudVolume(cloudpath, mip=mip)
        except exceptions.InfoUnavailableError:
            destvol = CloudVolume(cloudpath,
                                  mip=mip,
                                  info=self.meta.info,
                                  provenance=self.meta.provenance.serialize())
            destvol.commit_info()
            destvol.commit_provenance()
        except exceptions.ScaleUnavailableError:
            destvol = CloudVolume(cloudpath)
            for i in range(len(destvol.scales) + 1, len(self.meta.scales)):
                destvol.scales.append(self.meta.scales[i])
            destvol.commit_info()
            destvol.commit_provenance()

        if destvol.image.is_sharded(mip):
            raise exceptions.UnsupportedFormatError(
                f"Sharded destinations are not supported. got: {destvol.cloudpath}"
            )

        num_blocks = np.ceil(
            self.meta.bounds(mip).volume() /
            self.meta.chunk_size(mip).rectVolume()) / step
        num_blocks = int(np.ceil(num_blocks))

        cloudpaths = chunknames(bbox,
                                self.meta.bounds(mip),
                                self.meta.key(mip),
                                self.meta.chunk_size(mip),
                                protocol=self.meta.path.protocol)

        pbar = tqdm(
            desc='Transferring Blocks of {} Chunks'.format(step),
            unit='blocks',
            disable=(not self.config.progress),
            total=num_blocks,
        )

        cfsrc = CloudFiles(self.meta.cloudpath, secrets=self.config.secrets)
        cfdest = CloudFiles(cloudpath)

        def check(files):
            errors = [
              file for file in files if \
              (file['content'] is None or file['error'] is not None)
            ]
            if errors:
                error_paths = [f['path'] for f in errors]
                raise exceptions.EmptyFileException(
                    "{} were empty or had IO errors.".format(
                        ", ".join(error_paths)))
            return files

        with pbar:
            for srcpaths in sip(cloudpaths, step):
                files = check(cfsrc.get(srcpaths, raw=True))
                cfdest.puts(compression.transcode(files,
                                                  encoding=compress,
                                                  level=compress_level,
                                                  in_place=True),
                            compress=compress,
                            content_type=tx.content_type(destvol),
                            raw=True)
                pbar.update()