Beispiel #1
0
def compute_scales(vol, mip, shape, axis, factor, chunk_size=None):
  shape = min2(vol.meta.volume_size(mip), shape)
  # sometimes we downsample a base layer of 512x512 
  # into underlying chunks of 64x64 which permits more scales
  underlying_mip = (mip + 1) if (mip + 1) in vol.available_mips else mip

  if chunk_size:
    scale_chunk_size = Vec(*chunk_size).astype(np.float32)
  else:
    scale_chunk_size = vol.meta.chunk_size(underlying_mip).astype(np.float32)

  if factor is None:
    factor = axis_to_factor(axis)

  factors = compute_factors(shape, factor, scale_chunk_size, vol.meta.volume_size(mip))
  scales = [ vol.meta.resolution(mip) ]

  precision = max(map(getprecision, vol.meta.resolution(mip)))

  def prec(x):
    if precision == 0:
      return int(x)
    return round(x, precision)

  for factor3 in factors:
    scales.append(
      list(map(prec, Vec(*scales[-1], dtype=np.float32) * Vec(*factor3)))
    )
  return scales[1:]
Beispiel #2
0
def test_sharded_image_bits(scale):
    dataset_size = Vec(*scale["size"])
    chunk_size = Vec(*scale["chunk_sizes"][0])

    spec = create_sharded_image_info(dataset_size=dataset_size,
                                     chunk_size=chunk_size,
                                     encoding=scale["encoding"],
                                     dtype=np.uint8)

    shape = image_shard_shape_from_spec(spec, dataset_size, chunk_size)

    shape = lib.min2(shape, dataset_size)
    dataset_bbox = Bbox.from_vec(dataset_size)
    gpts = list(gridpoints(dataset_bbox, dataset_bbox, chunk_size))
    grid_size = np.ceil(dataset_size / chunk_size).astype(np.int64)

    spec = ShardingSpecification.from_dict(spec)
    reader = ShardReader(None, None, spec)

    morton_codes = compressed_morton_code(gpts, grid_size)
    min_num_shards = prod(dataset_size / shape)
    max_num_shards = prod(np.ceil(dataset_size / shape))

    assert 0 < min_num_shards <= 2**spec.shard_bits
    assert 0 < max_num_shards <= 2**spec.shard_bits

    real_num_shards = len(set(map(reader.get_filename, morton_codes)))

    assert min_num_shards <= real_num_shards <= max_num_shards
Beispiel #3
0
def test_broken_dataset():
  """
  This dataset was previously returning 19 total bits
  when 20 were needed to cover all the morton codes.
  """
  scale = {
    'chunk_sizes': [[128, 128, 20]],
    'encoding': 'raw',
    'key': '16_16_40',
    'resolution': [16, 16, 40],
    'size': [10240,10240,990],
    'voxel_offset': [17408,9216,4855],
  }

  dataset_size = Vec(*scale["size"])
  chunk_size = Vec(*scale["chunk_sizes"][0])

  spec = create_sharded_image_info( 
    dataset_size=dataset_size,
    chunk_size=chunk_size,
    encoding="jpeg",
    dtype=np.uint8
  )
  total_bits = spec["shard_bits"] + spec["minishard_bits"] + spec["preshift_bits"]
  assert total_bits == 20
Beispiel #4
0
def SpatialIndexTask(
    cloudpath: str,
    shape: Tuple[int, int, int],
    offset: Tuple[int, int, int],
    subdir: str,
    precision: int,
    mip: int = 0,
    fill_missing: bool = False,
    compress: Optional[Union[str, bool]] = 'gzip',
) -> None:
    """
  The main way to add a spatial index is to use the MeshTask or SkeletonTask,
  but old datasets or broken datasets may need it to be 
  reconstituted. An alternative use is create the spatial index
  over a different area size than the mesh or skeleton task.
  """
    cv = CloudVolume(cloudpath,
                     mip=mip,
                     bounded=False,
                     fill_missing=fill_missing)
    cf = CloudFiles(cloudpath)

    bounds = Bbox(Vec(*offset), Vec(*shape) + Vec(*offset))
    bounds = Bbox.clamp(bounds, cv.bounds)

    data_bounds = bounds.clone()
    data_bounds.maxpt += 1  # match typical Marching Cubes overlap

    resolution = cv.resolution

    # remap: old img -> img
    img, remap = cv.download(data_bounds, renumber=True)
    img = img[..., 0]
    slcs = find_objects(img)
    del img
    reverse_map = {v: k for k, v in remap.items()}  # img -> old img

    bboxes = {}
    for label, slc in enumerate(slcs):
        if slc is None:
            continue
        obj_bounds = Bbox.from_slices(slc)
        obj_bounds += Vec(*offset)
        obj_bounds *= Vec(*resolution, dtype=np.float32)
        bboxes[str(reverse_map[label+1])] = \
          obj_bounds.astype(resolution.dtype).to_list()

    bounds = bounds.astype(resolution.dtype) * resolution
    cf.put_json(
        cf.join(subdir, f"{bounds.to_filename(precision)}.spatial"),
        bboxes,
        compress=compress,
        cache_control=False,
    )
Beispiel #5
0
def add_scale(
  layer_path, mip,
  preserve_chunk_size=True, chunk_size=None,
  encoding=None, factor=None
):
  vol = CloudVolume(layer_path, mip=mip)

  if factor is None:
    factor = (2,2,1)

  new_resolution = vol.resolution * Vec(*factor)
  vol.meta.add_resolution(
    new_resolution, encoding=encoding, chunk_size=chunk_size
  )

  if chunk_size is None:
    if preserve_chunk_size:
      chunk_size = vol.scales[mip]['chunk_sizes']
    else:
      chunk_size = vol.scales[mip + 1]['chunk_sizes']
  else:
    chunk_size = [ chunk_size ]

  if encoding is None:
    encoding = vol.scales[mip]['encoding']

  vol.scales[mip + 1]['chunk_sizes'] = chunk_size

  return vol
Beispiel #6
0
def image_shard_shape_from_spec(
  spec: ShardingSpecification, 
  dataset_size: ShapeType, 
  chunk_size: ShapeType
) -> ShapeType:

  chunk_size = Vec(*chunk_size, dtype=np.uint64)
  dataset_size = Vec(*dataset_size, dtype=np.uint64)
  preshift_bits = np.uint64(spec["preshift_bits"])
  minishard_bits = np.uint64(spec["minishard_bits"])
  shape_bits = preshift_bits + minishard_bits

  grid_size = np.ceil(dataset_size / chunk_size).astype(np.uint64)
  one = np.uint64(1)

  if shape_bits >= 64:
    raise ValueError(
      f"preshift_bits ({preshift_bits}) + minishard_bits ({minishard_bits}) must be < 64. Sum: {shape_bits}"
    )

  def compute_shape_bits():
    shape = Vec(0,0,0, dtype=np.uint64)

    i = 0
    over = [ False, False, False ]
    while i < shape_bits:
      changed = False
      for dim in range(3):
        if 2 ** (shape[dim] + 1) < grid_size[dim] * 2 and not over[dim]:
          if 2 ** (shape[dim] + 1) >= grid_size[dim]:
            over[dim] = True
          shape[dim] += one
          i += 1
          changed = True

        if i >= shape_bits:
          return shape

      if not changed:
        return shape

    return shape

  shape = compute_shape_bits()
  shape = Vec(2 ** shape.x, 2 ** shape.y, 2 ** shape.z, dtype=np.uint64)
  return chunk_size * shape
Beispiel #7
0
def test_image_sharding_hash():
    spec = ShardingSpecification(
        type="neuroglancer_uint64_sharded_v1",
        data_encoding="gzip",
        hash="identity",
        minishard_bits=6,
        minishard_index_encoding="gzip",
        preshift_bits=9,
        shard_bits=16,
    )

    point = Vec(144689, 52487, 2829)
    volume_size = Vec(*[248832, 134144, 7063])
    chunk_size = Vec(*[128, 128, 16])

    grid_size = np.ceil(volume_size / chunk_size).astype(np.uint32)
    gridpt = np.ceil(point / chunk_size).astype(np.int32)
    code = compressed_morton_code(gridpt, grid_size)
    loc = spec.compute_shard_location(code)

    assert loc.shard_number == '458d'
def compute_factors(ds_shape, factor, chunk_size):
    grid_size = Vec(*ds_shape, dtype=np.float32) / Vec(*chunk_size,
                                                       dtype=np.float32)
    # find the dimension which will tolerate the smallest number of downsamples and
    # return the number it will accept. + 0.0001 then truncate to compensate for FP errors
    # that would result in the log e.g. resulting in 1.9999999382 when it should be an
    # exact result.

    # This filtering is to avoid problems with dividing by log(1)
    grid_size = [g for f, g in zip(factor, grid_size) if f != 1]
    grid_size = Vec(*grid_size, dtype=np.float32)

    factor_div = [f for f in factor if f != 1]
    factor_div = Vec(*factor_div, dtype=np.float32)

    if len(factor_div) == 0:
        return []

    N = np.log(grid_size) / np.log(factor_div)
    N += 0.0001
    return [factor] * int(min(N))
Beispiel #9
0
def compute_factors(ds_shape, factor, chunk_size, volume_size):
  chunk_size = np.array(chunk_size)
  grid_size = Vec(*ds_shape, dtype=np.float32) / Vec(*chunk_size, dtype=np.float32)
  # find the dimension which will tolerate the smallest number of downsamples and 
  # return the number it will accept. + 0.0001 then truncate to compensate for FP errors
  # that would result in the log e.g. resulting in 1.9999999382 when it should be an
  # exact result.

  # This filtering is to avoid problems with dividing by log(1)
  grid_size = [ g for f, g in zip(factor, grid_size) if f != 1 ]
  grid_size = Vec(*grid_size, dtype=np.float32)

  factor_div = [ f for f in factor if f != 1 ]
  factor_div = Vec(*factor_div, dtype=np.float32)

  if len(factor_div) == 0:
    return []

  epsilon = 0.0001
  N = np.log(grid_size) / np.log(factor_div)
  N += epsilon
  N = min(N)

  if N < epsilon:
    return []

  dsvol = np.array(volume_size) / (np.array(factor) ** int(np.ceil(N)))
  dsvol = np.array([ dsvol[i] for i,f in enumerate(factor) if f != 1 ])
  chunk_size = np.array([ chunk_size[i] for i,f in enumerate(factor) if f != 1 ])

  N, fract = int(N), (N - float(int(N)))

  # incomplete downsamples are only legal when the
  # volume size is smaller than the chunk size.
  if all(dsvol < chunk_size) and fract > 0.05:
    N += 1

  return [ factor ] * N
Beispiel #10
0
  def compute_shape_bits():
    shape = Vec(0,0,0, dtype=np.uint64)

    i = 0
    over = [ False, False, False ]
    while i < shape_bits:
      changed = False
      for dim in range(3):
        if 2 ** (shape[dim] + 1) < grid_size[dim] * 2 and not over[dim]:
          if 2 ** (shape[dim] + 1) >= grid_size[dim]:
            over[dim] = True
          shape[dim] += one
          i += 1
          changed = True

        if i >= shape_bits:
          return shape

      if not changed:
        return shape

    return shape
Beispiel #11
0
def downsample_shape_from_memory_target(
  data_width, cx, cy, cz, 
  factor, byte_target,
  max_mips=float('inf')
):
  """
  Compute the shape that will give the most downsamples for a given 
  memory target (e.g. 3e9 bytes aka 3 GB).

  data_width: byte size of dtype
  cx: chunk size x
  cy: chunk size y 
  cz: chunk size z
  factor: (2,2,1) or (2,2,2) are supported
  byte_target: memory used should be less than this

  Returns: Vec3 shape
  """
  # formulas come from solving the following optimization equations:
  #
  # factor (1,1,1)
  # find integers n and m such that
  # |n * cx - m * cy| is (approximately) minimized
  # treat cz as fixed to make thing easier.
  # We start with a guess that n = sqrt(byte_target / data_width / cx / cy / cz)
  #
  # factor (2,2,1)
  # 4/3 * data_width * cx^(2^n) * cy^(2^n) * cz < byte_target
  #
  # factor (2,2,2)
  # 8/7 * data_width * cx^(2^n) * cy^(2^n) * cz^(2^n) < byte_target
  #
  # it's possible to solve for an arbitrary factor, but more complicated
  # and we really only need those two as the blowup gets intense.
  if byte_target <= 0:
    raise ValueError(f"Unable to pick a shape for a byte budget <= 0. Got: {byte_target}")

  if cx * cy * cz <= 0:
    raise ValueError(f"Chunk size must have a positive integer volume. Got: <{cx},{cy},{cz}>")

  def n_shape(n, c_):
    num_downsamples = int(math.log2((c_ ** (2*n)) / c_))
    num_downsamples = int(min(num_downsamples, max_mips))
    return c_ * (2 ** num_downsamples)

  if factor == (1,1,1):
    n = int(math.sqrt(byte_target / data_width / cx / cy / cz))
    m = int(n * cx / cy)
    out = Vec(n * cx, m * cy, cz)
  elif factor == (2,2,1):
    if cx * cy == 1:
      size = 2 ** int(math.log2(math.sqrt(byte_target / cz)))
      out = Vec(size, size, cz)
    else:
      n = math.log(3/4 * byte_target / data_width / cz)
      n = n / 2 / math.log(cx * cy)
      shape = lambda c_: n_shape(n, c_) 
      out = Vec(shape(cx), shape(cy), cz)
  elif factor == (2,2,2):
    if cx * cy * cz == 1:
      size = 2 ** int(math.log2(round(byte_target ** (1/3), 5)))
      out = Vec(size, size, size)
    else:
      n = math.log(7/8 * byte_target / data_width)
      n = n / 2 / math.log(cx * cy * cz) 
      shape = lambda c_: n_shape(n, c_) 
      out = Vec(shape(cx), shape(cy), shape(cz))
  else:
    raise ValueError(f"This is now a harder optimization problem. Got: {factor}")

  out = out.astype(int)
  min_shape = Vec(cx,cy,cz)
  if any(out < min_shape):
    raise ValueError(
      f"Too little memory allocated to create a valid task."
      f" Got: {byte_target} Predicted Shape: {out} Minimum Shape: {min_shape}"
    )

  return out