Beispiel #1
0
    def _upload_individuals(self, mesh_binaries, generate_manifests):
        cf = CloudFiles(self.layer_path)

        content_type = "model/mesh"
        if self.options["encoding"] == "draco":
            content_type = "model/x.draco"

        cf.puts(
            ((f"{self._mesh_dir}/{segid}:{self.options['lod']}:{self._bounds.to_filename()}",
              mesh_binary) for segid, mesh_binary in mesh_binaries.items()),
            compress=self._encoding_to_compression_dict[
                self.options['encoding']],
            cache_control=self.options['cache_control'],
            content_type=content_type,
        )

        if generate_manifests:
            cf.put_jsons(
                ((f"{self._mesh_dir}/{segid}:{self.options['lod']}", {
                    "fragments": [
                        f"{segid}:{self.options['lod']}:{self._bounds.to_filename()}"
                    ]
                }) for segid, mesh_binary in mesh_binaries.items()),
                compress=None,
                cache_control=self.options['cache_control'],
            )
Beispiel #2
0
def test_get_json_order(s3, protocol):
  from cloudfiles import CloudFiles
  url = compute_url(protocol, 'get_json_order')
  cf = CloudFiles(url)

  N = 5300
  cf.put_jsons(( (str(z), [ z ]) for z in range(N) ))

  contents = cf.get_json(( str(z) for z in range(N) ))

  for z, content in enumerate(contents):
    assert content[0] == z

  cf.delete(( str(z) for z in range(N) ))
Beispiel #3
0
class JSONLayerBase(BaseLayerBackend):
    """A directory with one text file per section
    """
    def __init__(self,
                 path,
                 backend,
                 reference=None,
                 overwrite=True,
                 **kwargs):
        super().__init__(**kwargs)
        self.path = path
        self.dtype = 'O'
        self.backend = backend
        self.cf = CloudFiles(self.path, progress=False)

    def __str__(self):
        return "JSON {}".format(self.path)

    def get_sublayer(self, name, layer_type=None, path=None, **kwargs):
        if path is None:
            path = os.path.join(self.path, layer_type, name)

        if layer_type is None:
            layer_type = self.get_layer_type()

        return self.backend.create_layer(path=path,
                                         reference=self,
                                         layer_type=layer_type,
                                         **kwargs)

    def get_filename(self, z):
        return f'{z:06d}'

    def read_backend(self, bcube, **kwargs):
        z_range = bcube.z_range()
        corgie_logger.debug(f'Read from {str(self)}, z: {z_range}')
        data = []
        for z in z_range:
            f = self.cf.get_json(self.get_filename(z))
            data.append(f)
        return data

    def write_backend(self, data, bcube, **kwargs):
        z_range = range(*bcube.z_range())
        assert (len(data) == len(z_range))
        corgie_logger.debug(f'Write to {str(self)}, z: {z_range}')
        filepaths = [self.get_filename(z) for z in z_range]
        self.cf.put_jsons(zip(filepaths, data), cache_control='no-cache')
Beispiel #4
0
def MeshManifestPrefixTask(layer_path: str,
                           prefix: str,
                           lod: int = 0,
                           mesh_dir: Optional[str] = None):
    """
  Finalize mesh generation by post-processing chunk fragment
  lists into mesh fragment manifests.
  These are necessary for neuroglancer to know which mesh
  fragments to download for a given segid.

  If we parallelize using prefixes single digit prefixes ['0','1',..'9'] all meshes will
  be correctly processed. But if we do ['10','11',..'99'] meshes from [0,9] won't get
  processed and need to be handle specifically by creating tasks that will process
  a single mesh ['0:','1:',..'9:']
  """
    cf = CloudFiles(layer_path)
    info = cf.get_json('info')

    if mesh_dir is None and 'mesh' in info:
        mesh_dir = info['mesh']

    prefix = cf.join(mesh_dir, prefix)
    segids = defaultdict(list)

    regexp = re.compile(r'(\d+):(\d+):')
    for filename in cf.list(prefix=prefix):
        filename = os.path.basename(filename)
        # `match` implies the beginning (^). `search` matches whole string
        matches = re.search(regexp, filename)

        if not matches:
            continue

        segid, mlod = matches.groups()
        segid, mlod = int(segid), int(mlod)

        if mlod != lod:
            continue

        segids[segid].append(filename)

    items = ((f"{mesh_dir}/{segid}:{lod}", {
        "fragments": frags
    }) for segid, frags in segids.items())

    cf.put_jsons(items)
Beispiel #5
0
def MeshManifestFilesystemTask(
    layer_path: str,
    lod: int = 0,
    mesh_dir: Optional[str] = None,
):
    cf = CloudFiles(layer_path)
    info = cf.get_json('info')

    if mesh_dir is None and 'mesh' in info:
        mesh_dir = info['mesh']

    filepath = cloudfiles.paths.asfilepath(cf.join(layer_path, mesh_dir))
    segids = defaultdict(list)

    regexp = re.compile(r'(\d+):(\d+):')
    for entry in os.scandir(filepath):
        if not entry.is_file():
            continue

        filename = os.path.basename(entry.name)
        # `match` implies the beginning (^). `search` matches whole string
        matches = re.search(regexp, filename)

        if not matches:
            continue

        segid, mlod = matches.groups()
        segid, mlod = int(segid), int(mlod)

        if mlod != lod:
            continue

        filename, ext = os.path.splitext(filename)
        segids[segid].append(filename)

    items = ((f"{mesh_dir}/{segid}:{lod}", {
        "fragments": frags
    }) for segid, frags in segids.items())

    cf.put_jsons(items)
Beispiel #6
0
def create_sharded_multires_mesh_tasks(
  cloudpath:str, 
  shard_index_bytes=2**13, 
  minishard_index_bytes=2**15,
  num_lod:int = 1, 
  draco_compression_level:int = 1,
  vertex_quantization_bits:int = 16,
  minishard_index_encoding="gzip", 
  mesh_dir:Optional[str] = None, 
  spatial_index_db:Optional[str] = None
) -> Iterator[MultiResShardedMeshMergeTask]: 

  configure_multires_info(
    cloudpath, 
    vertex_quantization_bits, 
    mesh_dir
  )

  # rebuild b/c sharding changes the mesh source class
  cv = CloudVolume(cloudpath, progress=True, spatial_index_db=spatial_index_db) 
  cv.mip = cv.mesh.meta.mip

  # 17 sec to download for pinky100
  all_labels = cv.mesh.spatial_index.query(cv.bounds * cv.resolution)
  
  (shard_bits, minishard_bits, preshift_bits) = \
    compute_shard_params_for_hashed(
      num_labels=len(all_labels),
      shard_index_bytes=int(shard_index_bytes),
      minishard_index_bytes=int(minishard_index_bytes),
    )

  spec = ShardingSpecification(
    type='neuroglancer_uint64_sharded_v1',
    preshift_bits=preshift_bits,
    hash='murmurhash3_x86_128',
    minishard_bits=minishard_bits,
    shard_bits=shard_bits,
    minishard_index_encoding=minishard_index_encoding,
    data_encoding="raw", # draco encoded meshes
  )

  cv.mesh.meta.info['sharding'] = spec.to_dict()
  cv.mesh.meta.commit_info()

  cv = CloudVolume(cloudpath)

  # perf: ~66.5k hashes/sec on M1 ARM64
  shardfn = lambda lbl: cv.mesh.reader.spec.compute_shard_location(lbl).shard_number

  shard_labels = defaultdict(list)
  for label in tqdm(all_labels, desc="Hashes"):
    shard_labels[shardfn(label)].append(label)
  del all_labels

  cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True)
  files = ( 
    (str(shardno) + '.labels', labels) 
    for shardno, labels in shard_labels.items() 
  )
  cf.put_jsons(
    files, compress="gzip", 
    cache_control="no-cache", total=len(shard_labels)
  )

  cv.provenance.processing.append({
    'method': {
      'task': 'MultiResShardedMeshMergeTask',
      'cloudpath': cloudpath,
      'mip': cv.mesh.meta.mip,
      'num_lod': num_lod,
      'vertex_quantization_bits': vertex_quantization_bits,
      'preshift_bits': preshift_bits, 
      'minishard_bits': minishard_bits, 
      'shard_bits': shard_bits,
      'mesh_dir': mesh_dir,
      'draco_compression_level': draco_compression_level,
    },
    'by': operator_contact(),
    'date': strftime('%Y-%m-%d %H:%M %Z'),
  }) 
  cv.commit_provenance()

  return [
    partial(MultiResShardedMeshMergeTask,
      cloudpath, shard_no, 
      num_lod=num_lod,
      mesh_dir=mesh_dir, 
      spatial_index_db=spatial_index_db,
      draco_compression_level=draco_compression_level,
    )
    for shard_no in shard_labels.keys()
  ]
Beispiel #7
0
def create_sharded_skeleton_merge_tasks(layer_path,
                                        dust_threshold,
                                        tick_threshold,
                                        shard_index_bytes=2**13,
                                        minishard_index_bytes=2**15,
                                        minishard_index_encoding='gzip',
                                        data_encoding='gzip',
                                        max_cable_length=None,
                                        spatial_index_db=None):
    cv = CloudVolume(layer_path,
                     progress=True,
                     spatial_index_db=spatial_index_db)
    cv.mip = cv.skeleton.meta.mip

    # 17 sec to download for pinky100
    all_labels = cv.skeleton.spatial_index.query(cv.bounds * cv.resolution)

    (shard_bits, minishard_bits, preshift_bits) = \
      compute_shard_params_for_hashed(
        num_labels=len(all_labels),
        shard_index_bytes=int(shard_index_bytes),
        minishard_index_bytes=int(minishard_index_bytes),
      )

    spec = ShardingSpecification(
        type='neuroglancer_uint64_sharded_v1',
        preshift_bits=preshift_bits,
        hash='murmurhash3_x86_128',
        minishard_bits=minishard_bits,
        shard_bits=shard_bits,
        minishard_index_encoding=minishard_index_encoding,
        data_encoding=data_encoding,
    )
    cv.skeleton.meta.info['sharding'] = spec.to_dict()
    cv.skeleton.meta.commit_info()

    # rebuild b/c sharding changes the skeleton source
    cv = CloudVolume(layer_path,
                     progress=True,
                     spatial_index_db=spatial_index_db)
    cv.mip = cv.skeleton.meta.mip

    # perf: ~36k hashes/sec
    shardfn = lambda lbl: cv.skeleton.reader.spec.compute_shard_location(
        lbl).shard_number

    shard_labels = defaultdict(list)
    for label in tqdm(all_labels, desc="Hashes"):
        shard_labels[shardfn(label)].append(label)

    cf = CloudFiles(cv.skeleton.meta.layerpath, progress=True)
    files = ((str(shardno) + '.labels', labels)
             for shardno, labels in shard_labels.items())
    cf.put_jsons(files,
                 compress="gzip",
                 cache_control="no-cache",
                 total=len(shard_labels))

    cv.provenance.processing.append({
        'method': {
            'task': 'ShardedSkeletonMergeTask',
            'cloudpath': layer_path,
            'mip': cv.skeleton.meta.mip,
            'dust_threshold': dust_threshold,
            'tick_threshold': tick_threshold,
            'max_cable_length': max_cable_length,
            'preshift_bits': preshift_bits,
            'minishard_bits': minishard_bits,
            'shard_bits': shard_bits,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    cv.commit_provenance()

    return [
        ShardedSkeletonMergeTask(layer_path,
                                 shard_no,
                                 dust_threshold,
                                 tick_threshold,
                                 max_cable_length=max_cable_length)
        for shard_no in shard_labels.keys()
    ]
Beispiel #8
0
def create_sharded_multires_mesh_from_unsharded_tasks(
  src:str, 
  dest:str,
  shard_index_bytes=2**13, 
  minishard_index_bytes=2**15,
  min_shards:int = 1,
  num_lod:int = 1, 
  draco_compression_level:int = 1,
  vertex_quantization_bits:int = 16,
  minishard_index_encoding="gzip", 
  mesh_dir:Optional[str] = None, 
) -> Iterator[MultiResShardedMeshMergeTask]: 
  
  configure_multires_info(
    dest, 
    vertex_quantization_bits, 
    mesh_dir
  )

  cv_src = CloudVolume(src)
  cf = CloudFiles(cv_src.mesh.meta.layerpath)

  all_labels = []
  SEGID_RE = re.compile(r'(\d+):0(?:\.gz|\.br|\.zstd)?$')
  for path in cf.list():
    match = SEGID_RE.search(path)
    if match is None:
      continue
    (segid,) = match.groups()
    all_labels.append(int(segid))

  (shard_bits, minishard_bits, preshift_bits) = \
    compute_shard_params_for_hashed(
      num_labels=len(all_labels),
      shard_index_bytes=int(shard_index_bytes),
      minishard_index_bytes=int(minishard_index_bytes),
      min_shards=int(min_shards),
    )

  cv_dest = CloudVolume(dest, mesh_dir=mesh_dir)
  cv_dest.mesh.meta.info["mip"] = cv_src.mesh.meta.mip
  cv_dest.commit_info()

  spec = ShardingSpecification(
    type='neuroglancer_uint64_sharded_v1',
    preshift_bits=preshift_bits,
    hash='murmurhash3_x86_128',
    minishard_bits=minishard_bits,
    shard_bits=shard_bits,
    minishard_index_encoding=minishard_index_encoding,
    data_encoding="raw", # draco encoded meshes
  )

  cv_dest.mesh.meta.info['sharding'] = spec.to_dict()
  cv_dest.mesh.meta.commit_info()

  cv_dest = CloudVolume(dest, mesh_dir=mesh_dir)

  # perf: ~66.5k hashes/sec on M1 ARM64
  shardfn = lambda lbl: cv_dest.mesh.reader.spec.compute_shard_location(lbl).shard_number

  shard_labels = defaultdict(list)
  for label in tqdm(all_labels, desc="Hashes"):
    shard_labels[shardfn(label)].append(label)
  del all_labels

  cf = CloudFiles(cv_dest.mesh.meta.layerpath, progress=True)
  files = ( 
    (str(shardno) + '.labels', labels) 
    for shardno, labels in shard_labels.items() 
  )
  cf.put_jsons(
    files, compress="gzip", 
    cache_control="no-cache", total=len(shard_labels)
  )

  cv_dest.provenance.processing.append({
    'method': {
      'task': 'MultiResShardedFromUnshardedMeshMergeTask',
      'src': src,
      'dest': dest,
      'num_lod': num_lod,
      'vertex_quantization_bits': vertex_quantization_bits,
      'preshift_bits': preshift_bits, 
      'minishard_bits': minishard_bits, 
      'shard_bits': shard_bits,
      'mesh_dir': mesh_dir,
      'draco_compression_level': draco_compression_level,
    },
    'by': operator_contact(),
    'date': strftime('%Y-%m-%d %H:%M %Z'),
  }) 
  cv_dest.commit_provenance()

  return [
    partial(MultiResShardedFromUnshardedMeshMergeTask,
      src=src, 
      dest=dest, 
      shard_no=shard_no, 
      num_lod=num_lod,
      mesh_dir=mesh_dir, 
      draco_compression_level=draco_compression_level,
    )
    for shard_no in shard_labels.keys()
  ]
Beispiel #9
0
def create_sharded_skeletons_from_unsharded_tasks(
    src: str,
    dest: str,
    shard_index_bytes=2**13,
    minishard_index_bytes=2**15,
    min_shards: int = 1,
    minishard_index_encoding='gzip',
    data_encoding='gzip',
    skel_dir: Optional[str] = None,
) -> Iterator[ShardedFromUnshardedSkeletonMergeTask]:
    cv_src = CloudVolume(src)
    cv_src.mip = cv_src.skeleton.meta.mip

    cf = CloudFiles(cv_src.skeleton.meta.layerpath)

    all_labels = []
    SEGID_RE = re.compile(r'(\d+)(?:\.gz|\.br|\.zstd)?$')
    for path in cf.list():
        match = SEGID_RE.search(path)
        if match is None:
            continue
        (segid, ) = match.groups()
        all_labels.append(int(segid))

    cv_dest = CloudVolume(dest, skel_dir=skel_dir)
    cv_dest.skeleton.meta.info = copy.deepcopy(cv_src.skeleton.meta.info)
    cv_dest.skeleton.meta.info["vertex_attributes"] = [
        attr for attr in cv_dest.skeleton.meta.info["vertex_attributes"]
        if attr["data_type"] in ("float32", "float64")
    ]

    (shard_bits, minishard_bits, preshift_bits) = \
      compute_shard_params_for_hashed(
        num_labels=len(all_labels),
        shard_index_bytes=int(shard_index_bytes),
        minishard_index_bytes=int(minishard_index_bytes),
        min_shards=int(min_shards),
      )

    spec = ShardingSpecification(
        type='neuroglancer_uint64_sharded_v1',
        preshift_bits=preshift_bits,
        hash='murmurhash3_x86_128',
        minishard_bits=minishard_bits,
        shard_bits=shard_bits,
        minishard_index_encoding=minishard_index_encoding,
        data_encoding=data_encoding,
    )

    cv_dest.skeleton.meta.info['sharding'] = spec.to_dict()
    cv_dest.skeleton.meta.commit_info()

    cv_dest = CloudVolume(dest, skel_dir=skel_dir)

    # perf: ~66.5k hashes/sec on M1 ARM64
    shardfn = lambda lbl: cv_dest.skeleton.reader.spec.compute_shard_location(
        lbl).shard_number

    shard_labels = defaultdict(list)
    for label in tqdm(all_labels, desc="Hashes"):
        shard_labels[shardfn(label)].append(label)
    del all_labels

    cf = CloudFiles(cv_dest.skeleton.meta.layerpath, progress=True)
    files = ((str(shardno) + '.labels', labels)
             for shardno, labels in shard_labels.items())
    cf.put_jsons(files,
                 compress="gzip",
                 cache_control="no-cache",
                 total=len(shard_labels))

    cv_dest.provenance.processing.append({
        'method': {
            'task': 'ShardedFromUnshardedSkeletonMergeTask',
            'src': src,
            'dest': dest,
            'preshift_bits': preshift_bits,
            'minishard_bits': minishard_bits,
            'shard_bits': shard_bits,
            'skel_dir': skel_dir,
        },
        'by': operator_contact(),
        'date': strftime('%Y-%m-%d %H:%M %Z'),
    })
    cv_dest.commit_provenance()

    return [
        partial(
            ShardedFromUnshardedSkeletonMergeTask,
            src=src,
            dest=dest,
            shard_no=shard_no,
            skel_dir=skel_dir,
        ) for shard_no in shard_labels.keys()
    ]