def skeldir(cloudpath):
    cf = CloudFiles(cloudpath)
    info = cf.get_json('info')

    skel_dir = 'skeletons/'
    if 'skeletons' in info:
        skel_dir = info['skeletons']
    return skel_dir
Exemple #2
0
    def execute(self):
        cf = CloudFiles(self.layer_path)
        self._info = cf.get_json('info')

        if self.mesh_dir is None and 'mesh' in self._info:
            self.mesh_dir = self._info['mesh']

        self._generate_manifests(cf)
Exemple #3
0
    def fetch_info(self):
        cf = CloudFiles(self.cloudpath, secrets=self.config.secrets)
        self.attributes["root"] = cf.get_json("attributes.json")

        if 'pixelResolution' in self.attributes["root"]:
            resolution = self.attributes["root"]["pixelResolution"][
                "dimensions"]
        else:
            resolution = self.attributes["root"]["resolution"]

        scale_dirs = [
            cf.join(f"s{i}", "attributes.json")
            for i in range(len(self.attributes["root"]["scales"]))
        ]
        scale_attrs = cf.get_json(scale_dirs)
        self.attributes["scales"] = scale_attrs

        # glossing over that each scale can have
        # a different data type, but usually it
        # should all be the same
        data_type = scale_attrs[0]["dataType"]

        info = PrecomputedMetadata.create_info(
            num_channels=1,
            layer_type="image",
            data_type=data_type,
            encoding=scale_attrs[0]["compression"]["type"],
            resolution=resolution,
            voxel_offset=[0, 0, 0],
            volume_size=scale_attrs[0]["dimensions"][:3],
            chunk_size=scale_attrs[0]["blockSize"],
        )

        for scale in scale_attrs[1:]:
            self.add_scale(scale["downsamplingFactors"],
                           chunk_size=scale["blockSize"],
                           encoding=scale["compression"]["type"],
                           info=info)

        return info
def test_get_json_order(s3, protocol):
  from cloudfiles import CloudFiles
  url = compute_url(protocol, 'get_json_order')
  cf = CloudFiles(url)

  N = 5300
  cf.put_jsons(( (str(z), [ z ]) for z in range(N) ))

  contents = cf.get_json(( str(z) for z in range(N) ))

  for z, content in enumerate(contents):
    assert content[0] == z

  cf.delete(( str(z) for z in range(N) ))
Exemple #5
0
class JSONLayerBase(BaseLayerBackend):
    """A directory with one text file per section
    """
    def __init__(self,
                 path,
                 backend,
                 reference=None,
                 overwrite=True,
                 **kwargs):
        super().__init__(**kwargs)
        self.path = path
        self.dtype = 'O'
        self.backend = backend
        self.cf = CloudFiles(self.path, progress=False)

    def __str__(self):
        return "JSON {}".format(self.path)

    def get_sublayer(self, name, layer_type=None, path=None, **kwargs):
        if path is None:
            path = os.path.join(self.path, layer_type, name)

        if layer_type is None:
            layer_type = self.get_layer_type()

        return self.backend.create_layer(path=path,
                                         reference=self,
                                         layer_type=layer_type,
                                         **kwargs)

    def get_filename(self, z):
        return f'{z:06d}'

    def read_backend(self, bcube, **kwargs):
        z_range = bcube.z_range()
        corgie_logger.debug(f'Read from {str(self)}, z: {z_range}')
        data = []
        for z in z_range:
            f = self.cf.get_json(self.get_filename(z))
            data.append(f)
        return data

    def write_backend(self, data, bcube, **kwargs):
        z_range = range(*bcube.z_range())
        assert (len(data) == len(z_range))
        corgie_logger.debug(f'Write to {str(self)}, z: {z_range}')
        filepaths = [self.get_filename(z) for z in z_range]
        self.cf.put_jsons(zip(filepaths, data), cache_control='no-cache')
Exemple #6
0
def MeshManifestPrefixTask(layer_path: str,
                           prefix: str,
                           lod: int = 0,
                           mesh_dir: Optional[str] = None):
    """
  Finalize mesh generation by post-processing chunk fragment
  lists into mesh fragment manifests.
  These are necessary for neuroglancer to know which mesh
  fragments to download for a given segid.

  If we parallelize using prefixes single digit prefixes ['0','1',..'9'] all meshes will
  be correctly processed. But if we do ['10','11',..'99'] meshes from [0,9] won't get
  processed and need to be handle specifically by creating tasks that will process
  a single mesh ['0:','1:',..'9:']
  """
    cf = CloudFiles(layer_path)
    info = cf.get_json('info')

    if mesh_dir is None and 'mesh' in info:
        mesh_dir = info['mesh']

    prefix = cf.join(mesh_dir, prefix)
    segids = defaultdict(list)

    regexp = re.compile(r'(\d+):(\d+):')
    for filename in cf.list(prefix=prefix):
        filename = os.path.basename(filename)
        # `match` implies the beginning (^). `search` matches whole string
        matches = re.search(regexp, filename)

        if not matches:
            continue

        segid, mlod = matches.groups()
        segid, mlod = int(segid), int(mlod)

        if mlod != lod:
            continue

        segids[segid].append(filename)

    items = ((f"{mesh_dir}/{segid}:{lod}", {
        "fragments": frags
    }) for segid, frags in segids.items())

    cf.put_jsons(items)
Exemple #7
0
def test_read_write(s3, protocol, num_threads, green):
    from cloudfiles import CloudFiles, exceptions
    url = compute_url(protocol, "rw")

    cf = CloudFiles(url, num_threads=num_threads, green=green)

    content = b'some_string'
    cf.put('info', content, compress=None, cache_control='no-cache')
    cf['info2'] = content

    assert cf.get('info') == content
    assert cf['info2'] == content
    assert cf['info2', 0:3] == content[0:3]
    assert cf['info2', :] == content[:]
    assert cf.get('nonexistentfile') is None

    assert cf.get('info', return_dict=True) == {"info": content}
    assert cf.get(['info', 'info2'], return_dict=True) == {
        "info": content,
        "info2": content
    }

    del cf['info2']
    assert cf.exists('info2') == False

    num_infos = max(num_threads, 1)
    results = cf.get(['info' for i in range(num_infos)])

    assert len(results) == num_infos
    assert results[0]['path'] == 'info'
    assert results[0]['content'] == content
    assert all(map(lambda x: x['error'] is None, results))
    assert cf.get(['nonexistentfile'])[0]['content'] is None

    cf.delete('info')

    cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache')
    results = cf.get_json('info')
    assert results == {'omg': 'wow'}

    cf.delete('info')

    if protocol == 'file':
        rmtree(url)
    def __init__(self,
                 volume_path: str,
                 mip: int = 0,
                 expand_margin_size: Cartesian = Cartesian(0, 0, 0),
                 expand_direction: int = None,
                 fill_missing: bool = False,
                 validate_mip: int = None,
                 blackout_sections: bool = None,
                 dry_run: bool = False,
                 name: str = 'cutout'):
        super().__init__(name=name)
        self.volume_path = volume_path
        self.mip = mip
        self.fill_missing = fill_missing
        self.validate_mip = validate_mip
        self.blackout_sections = blackout_sections
        self.dry_run = dry_run

        if isinstance(expand_margin_size, tuple):
            expand_margin_size = Cartesian.from_collection(expand_margin_size)

        if expand_direction == 1:
            expand_margin_size = (0, 0, 0, *expand_margin_size)
        elif expand_direction == -1:
            expand_margin_size = (*expand_margin_size, 0, 0, 0)
        else:
            assert expand_direction is None
        self.expand_margin_size = expand_margin_size

        if blackout_sections:
            stor = CloudFiles(volume_path)
            self.blackout_section_ids = stor.get_json(
                'blackout_section_ids.json')['section_ids']

        verbose = (logging.getLogger().getEffectiveLevel() <= 30)
        self.vol = CloudVolume(self.volume_path,
                               bounded=False,
                               fill_missing=self.fill_missing,
                               progress=verbose,
                               mip=self.mip,
                               cache=False,
                               green_threads=True)
Exemple #9
0
def create_xfer_meshes_tasks(
  src:str,
  dest:str,
  mesh_dir:Optional[str] = None, 
  magnitude=2,
):
  cv_src = CloudVolume(src)
  cf_dest = CloudFiles(dest)

  if not mesh_dir:
    info = cf_dest.get_json("info")
    if info.get("mesh", None):
      mesh_dir = info.get("mesh")

  cf_dest.put_json(f"{mesh_dir}/info", cv_src.mesh.meta.info)

  alphabet = [ str(i) for i in range(10) ]
  if cv_src.mesh.meta.is_sharded():
    alphabet += [ 'a', 'b', 'c', 'd', 'e', 'f' ]

  prefixes = itertools.product(*([ alphabet ] * magnitude))
  prefixes = [ "".join(x) for x in prefixes ]

  # explicitly enumerate all prefixes smaller than the magnitude.
  for i in range(1, magnitude):
    explicit_prefix = itertools.product(*([ alphabet ] * i))
    explicit_prefix = [ "".join(x) for x in explicit_prefix ]
    if cv_src.mesh.meta.is_sharded():
      prefixes += [ f"{x}." for x in explicit_prefix ]
    else:
      prefixes += [ f"{x}:0" for x in explicit_prefix ]

  return [
    partial(TransferMeshFilesTask,
      src=src,
      dest=dest,
      prefix=prefix,
      mesh_dir=mesh_dir,
    )
    for prefix in prefixes
  ]
Exemple #10
0
def MeshManifestFilesystemTask(
    layer_path: str,
    lod: int = 0,
    mesh_dir: Optional[str] = None,
):
    cf = CloudFiles(layer_path)
    info = cf.get_json('info')

    if mesh_dir is None and 'mesh' in info:
        mesh_dir = info['mesh']

    filepath = cloudfiles.paths.asfilepath(cf.join(layer_path, mesh_dir))
    segids = defaultdict(list)

    regexp = re.compile(r'(\d+):(\d+):')
    for entry in os.scandir(filepath):
        if not entry.is_file():
            continue

        filename = os.path.basename(entry.name)
        # `match` implies the beginning (^). `search` matches whole string
        matches = re.search(regexp, filename)

        if not matches:
            continue

        segid, mlod = matches.groups()
        segid, mlod = int(segid), int(mlod)

        if mlod != lod:
            continue

        filename, ext = os.path.splitext(filename)
        segids[segid].append(filename)

    items = ((f"{mesh_dir}/{segid}:{lod}", {
        "fragments": frags
    }) for segid, frags in segids.items())

    cf.put_jsons(items)
Exemple #11
0
def configure_multires_info(
  cloudpath:str,
  vertex_quantization_bits:int, 
  mesh_dir:str
):
  """
  Computes properties and uploads a multires 
  mesh info file
  """
  assert vertex_quantization_bits in (10, 16)

  vol = CloudVolume(cloudpath)

  mesh_dir = mesh_dir or vol.info.get("mesh", None)

  if not "mesh" in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  res = vol.meta.resolution(vol.mesh.meta.mip)

  cf = CloudFiles(cloudpath)
  info_filename = f'{mesh_dir}/info'
  mesh_info = cf.get_json(info_filename) or {}
  new_mesh_info = copy.deepcopy(mesh_info)
  new_mesh_info['@type'] = "neuroglancer_multilod_draco"
  new_mesh_info['vertex_quantization_bits'] = vertex_quantization_bits
  new_mesh_info['transform'] = [ 
    res[0], 0,      0,      0,
    0,      res[1], 0,      0,
    0,      0,      res[2], 0,
  ]
  new_mesh_info['lod_scale_multiplier'] = 1.0

  if new_mesh_info != mesh_info:
    cf.put_json(
      info_filename, new_mesh_info, 
      cache_control="no-cache"
    )
def test_http_read():
    from cloudfiles import CloudFiles, exceptions
    cf = CloudFiles(
        "https://storage.googleapis.com/seunglab-test/test_v0/black/")
    info = cf.get_json('info')

    assert info == {
        "data_type":
        "uint8",
        "num_channels":
        1,
        "scales": [{
            "chunk_sizes": [[64, 64, 50]],
            "encoding": "raw",
            "key": "6_6_30",
            "resolution": [6, 6, 30],
            "size": [1024, 1024, 100],
            "voxel_offset": [0, 0, 0]
        }],
        "type":
        "image"
    }
Exemple #13
0
def create_meshing_tasks(
    layer_path, mip, shape=(448, 448, 448), 
    simplification=True, max_simplification_error=40,
    mesh_dir=None, cdn_cache=False, dust_threshold=None,
    object_ids=None, progress=False, fill_missing=False,
    encoding='precomputed', spatial_index=True, sharded=False,
    compress='gzip'
  ):
  shape = Vec(*shape)

  vol = CloudVolume(layer_path, mip)

  if mesh_dir is None:
    mesh_dir = 'mesh_mip_{}_err_{}'.format(mip, max_simplification_error)

  if not 'mesh' in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  cf = CloudFiles(layer_path)
  info_filename = '{}/info'.format(mesh_dir)
  mesh_info = cf.get_json(info_filename) or {}
  mesh_info['@type'] = 'neuroglancer_legacy_mesh'
  mesh_info['mip'] = int(vol.mip)
  mesh_info['chunk_size'] = shape.tolist()
  if spatial_index:
    mesh_info['spatial_index'] = {
        'resolution': vol.resolution.tolist(),
        'chunk_size': (shape*vol.resolution).tolist(),
    }
  cf.put_json(info_filename, mesh_info)

  class MeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return MeshTask(
        shape=shape.clone(),
        offset=offset.clone(),
        layer_path=layer_path,
        mip=vol.mip,
        simplification_factor=(0 if not simplification else 100),
        max_simplification_error=max_simplification_error,
        mesh_dir=mesh_dir, 
        cache_control=('' if cdn_cache else 'no-cache'),
        dust_threshold=dust_threshold,
        progress=progress,
        object_ids=object_ids,
        fill_missing=fill_missing,
        encoding=encoding,
        spatial_index=spatial_index,
        sharded=sharded,
        compress=compress,
      )

    def on_finish(self):
      vol.provenance.processing.append({
        'method': {
          'task': 'MeshTask',
          'layer_path': layer_path,
          'mip': vol.mip,
          'shape': shape.tolist(),
          'simplification': simplification,
          'max_simplification_error': max_simplification_error,
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'cdn_cache': cdn_cache,
          'dust_threshold': dust_threshold,
          'encoding': encoding,
          'object_ids': object_ids,
          'spatial_index': spatial_index,
          'sharded': sharded,
          'compress': compress,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      vol.commit_provenance()

  return MeshTaskIterator(vol.mip_bounds(mip), shape)
Exemple #14
0
def create_spatial_index_mesh_tasks(
  cloudpath:str, 
  shape:Tuple[int,int,int] = (448,448,448), 
  mip:int = 0, 
  fill_missing:bool = False, 
  compress:Optional[Union[str,bool]] = 'gzip', 
  mesh_dir:Optional[str] = None
):
  """
  The main way to add a spatial index is to use the MeshTask,
  but old datasets or broken datasets may need it to be 
  reconstituted. An alternative use is create the spatial index
  over a different area size than the mesh task.
  """
  shape = Vec(*shape)

  vol = CloudVolume(cloudpath, mip=mip)

  if mesh_dir is None:
    mesh_dir = f"mesh_mip_{mip}_err_40"

  if not "mesh" in vol.info:
    vol.info['mesh'] = mesh_dir
    vol.commit_info()

  cf = CloudFiles(cloudpath)
  info_filename = '{}/info'.format(mesh_dir)
  mesh_info = cf.get_json(info_filename) or {}
  new_mesh_info = copy.deepcopy(mesh_info)
  new_mesh_info['@type'] = new_mesh_info.get('@type', 'neuroglancer_legacy_mesh') 
  new_mesh_info['mip'] = new_mesh_info.get("mip", int(vol.mip))
  new_mesh_info['chunk_size'] = shape.tolist()
  new_mesh_info['spatial_index'] = {
    'resolution': vol.resolution.tolist(),
    'chunk_size': (shape * vol.resolution).tolist(),
  }
  if new_mesh_info != mesh_info:
    cf.put_json(info_filename, new_mesh_info)

  class SpatialIndexMeshTaskIterator(FinelyDividedTaskIterator):
    def task(self, shape, offset):
      return partial(MeshSpatialIndex, 
        cloudpath=cloudpath,
        shape=shape,
        offset=offset,
        mip=int(mip),
        fill_missing=bool(fill_missing),
        compress=compress,
        mesh_dir=mesh_dir,
      )

    def on_finish(self):
      vol.provenance.processing.append({
        'method': {
          'task': 'MeshSpatialIndex',
          'cloudpath': vol.cloudpath,
          'shape': shape.tolist(),
          'mip': int(mip),
          'mesh_dir': mesh_dir,
          'fill_missing': fill_missing,
          'compress': compress,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
      }) 
      vol.commit_provenance()

  return SpatialIndexMeshTaskIterator(vol.bounds, shape)
Exemple #15
0
def create_spatial_index_skeleton_tasks(
        cloudpath: str,
        shape: Tuple[int, int, int] = (448, 448, 448),
        mip: int = 0,
        fill_missing: bool = False,
        compress: Optional[Union[str, bool]] = 'gzip',
        skel_dir: Optional[str] = None):
    """
  The main way to add a spatial index is to use the SkeletonTask,
  but old datasets or broken datasets may need it to be 
  reconstituted. An alternative use is create the spatial index
  over a different area size than the skeleton task.
  """
    shape = Vec(*shape)

    vol = CloudVolume(cloudpath, mip=mip)

    if skel_dir is None and not vol.info.get("skeletons", None):
        skel_dir = f"skeletons_mip_{mip}"
    elif skel_dir is None and vol.info.get("skeletons", None):
        skel_dir = vol.info["skeletons"]

    if not "skeletons" in vol.info:
        vol.info['skeletons'] = skel_dir
        vol.commit_info()

    cf = CloudFiles(cloudpath)
    info_filename = cf.join(skel_dir, 'info')
    skel_info = cf.get_json(info_filename) or {}
    new_skel_info = copy.deepcopy(skel_info)
    new_skel_info['@type'] = new_skel_info.get('@type',
                                               'neuroglancer_skeletons')
    new_skel_info['mip'] = new_skel_info.get("mip", int(vol.mip))
    new_skel_info['chunk_size'] = shape.tolist()
    new_skel_info['spatial_index'] = {
        'resolution': vol.resolution.tolist(),
        'chunk_size': (shape * vol.resolution).tolist(),
    }
    if new_skel_info != skel_info:
        cf.put_json(info_filename, new_skel_info)

    vol = CloudVolume(cloudpath, mip=mip)  # reload spatial_index

    class SpatialIndexSkeletonTaskIterator(FinelyDividedTaskIterator):
        def task(self, shape, offset):
            return partial(
                SpatialIndexTask,
                cloudpath=cloudpath,
                shape=shape,
                offset=offset,
                subdir=skel_dir,
                precision=vol.skeleton.spatial_index.precision,
                mip=int(mip),
                fill_missing=bool(fill_missing),
                compress=compress,
            )

        def on_finish(self):
            vol.provenance.processing.append({
                'method': {
                    'task': 'SpatialIndexTask',
                    'cloudpath': vol.cloudpath,
                    'shape': shape.tolist(),
                    'mip': int(mip),
                    'subdir': skel_dir,
                    'fill_missing': fill_missing,
                    'compress': compress,
                },
                'by':
                operator_contact(),
                'date':
                strftime('%Y-%m-%d %H:%M %Z'),
            })
            vol.commit_provenance()

    return SpatialIndexSkeletonTaskIterator(vol.bounds, shape)