Esempio n. 1
0
def test_exists(s3, protocol):
  from cloudfiles import CloudFiles, exceptions
  url = compute_url(protocol, "exists")

  cf = CloudFiles(url, num_threads=5)
  content = b'some_string'
  cf.put('info', content, compress=None)
  
  assert cf.exists('info')
  assert not cf.exists('doesntexist')

  assert cf.exists(['info'])['info']
  assert not cf.exists(['doesntexist'])['doesntexist']

  cf.delete('info')
Esempio n. 2
0
    def exists(self, segids, progress=None):
        """
    Checks if the mesh exists

    Returns: { MultiLevelPrecomputedMeshManifest or None, ... }
    """
        cf = CloudFiles(self.path)
        return cf.exists((f"{segid}.index" for segid in segids))
Esempio n. 3
0
def test_read_write(s3, protocol, num_threads, green):
    from cloudfiles import CloudFiles, exceptions
    url = compute_url(protocol, "rw")

    cf = CloudFiles(url, num_threads=num_threads, green=green)

    content = b'some_string'
    cf.put('info', content, compress=None, cache_control='no-cache')
    cf['info2'] = content

    assert cf.get('info') == content
    assert cf['info2'] == content
    assert cf['info2', 0:3] == content[0:3]
    assert cf['info2', :] == content[:]
    assert cf.get('nonexistentfile') is None

    assert cf.get('info', return_dict=True) == {"info": content}
    assert cf.get(['info', 'info2'], return_dict=True) == {
        "info": content,
        "info2": content
    }

    del cf['info2']
    assert cf.exists('info2') == False

    num_infos = max(num_threads, 1)
    results = cf.get(['info' for i in range(num_infos)])

    assert len(results) == num_infos
    assert results[0]['path'] == 'info'
    assert results[0]['content'] == content
    assert all(map(lambda x: x['error'] is None, results))
    assert cf.get(['nonexistentfile'])[0]['content'] is None

    cf.delete('info')

    cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache')
    results = cf.get_json('info')
    assert results == {'omg': 'wow'}

    cf.delete('info')

    if protocol == 'file':
        rmtree(url)
Esempio n. 4
0
  def exists(self, segids, progress=None):
    """
    Checks if the mesh exists.

    Returns: { label: path or None, ... }
    """
    manifest_paths = [ self.manifest_path(segid) for segid in segids ]
    progress = progress if progress is not None else self.config.progress

    cf = CloudFiles(self.meta.cloudpath, progress=progress, green=self.config.green)
    exists = cf.exists(manifest_paths)

    segid_regexp = re.compile(r'(\d+):0$')

    output = {}
    for path, there in exists.items():
      (segid,) = re.search(segid_regexp, path).groups()
      output[segid] = path if there else None
  
    return output
Esempio n. 5
0
def _cp_single(ctx, source, destination, recursive, compression, progress,
               block_size):
    use_stdin = (source == '-')
    use_stdout = (destination == '-')

    if use_stdout:
        progress = False  # can't have the progress bar interfering

    nsrc = normalize_path(source)
    ndest = normalize_path(destination)

    # For more information see:
    # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
    # Try to follow cp rules. If the directory exists,
    # copy the base source directory into the dest directory
    # If the directory does not exist, then we copy into
    # the dest directory.
    # Both x* and x** should not copy the base directory
    if recursive and nsrc[-1] != "*":
        if CloudFiles(ndest).isdir():
            if nsrc[-1] == '/':
                nsrc = nsrc[:-1]
            ndest = cloudpathjoin(ndest, os.path.basename(nsrc))

    ctx.ensure_object(dict)
    parallel = int(ctx.obj.get("parallel", 1))

    issrcdir = ispathdir(source) and use_stdin == False
    isdestdir = ispathdir(destination)

    srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
    many, flat, prefix = get_mfp(nsrc, recursive)

    if issrcdir and not many:
        print(f"cloudfiles: {source} is a directory (not copied).")
        return

    xferpaths = os.path.basename(nsrc)
    if use_stdin:
        xferpaths = sys.stdin.readlines()
        xferpaths = [x.replace("\n", "") for x in xferpaths]
        prefix = os.path.commonprefix(xferpaths)
        xferpaths = [x.replace(prefix, "") for x in xferpaths]
        srcpath = cloudpathjoin(srcpath, prefix)
    elif many:
        xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix,
                                                         flat=flat)

    destpath = ndest
    if isinstance(xferpaths, str):
        destpath = ndest if isdestdir else os.path.dirname(ndest)
    elif not isdestdir:
        if os.path.exists(ndest.replace("file://", "")):
            print(f"cloudfiles: {ndest} is not a directory (not copied).")
            return

    if compression == "same":
        compression = None
    elif compression == "none":
        compression = False

    if not isinstance(xferpaths, str):
        if parallel == 1:
            _cp(srcpath, destpath, compression, progress, block_size,
                xferpaths)
            return

        total = None
        try:
            total = len(xferpaths)
        except TypeError:
            pass

        if use_stdout:
            fn = partial(_cp_stdout, srcpath)
        else:
            fn = partial(_cp, srcpath, destpath, compression, False,
                         block_size)

        with tqdm(desc="Transferring", total=total,
                  disable=(not progress)) as pbar:
            with pathos.pools.ProcessPool(parallel) as executor:
                for _ in executor.imap(fn, sip(xferpaths, block_size)):
                    pbar.update(block_size)
    else:
        cfsrc = CloudFiles(srcpath, green=True, progress=progress)
        if not cfsrc.exists(xferpaths):
            print(
                f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}"
            )
            return

        if use_stdout:
            _cp_stdout(srcpath, xferpaths)
            return

        downloaded = cfsrc.get(xferpaths, raw=True)
        if compression is not None:
            downloaded = transcode(downloaded, compression, in_place=True)

        cfdest = CloudFiles(destpath, green=True, progress=progress)
        if isdestdir:
            cfdest.put(os.path.basename(nsrc), downloaded, raw=True)
        else:
            cfdest.put(os.path.basename(ndest), downloaded, raw=True)
Esempio n. 6
0
def _cp_single(ctx, source, destination, recursive, compression, progress,
               block_size):
    use_stdin = (source == '-')

    nsrc = normalize_path(source)
    ndest = normalize_path(destination)

    ctx.ensure_object(dict)
    parallel = int(ctx.obj.get("parallel", 1))

    issrcdir = ispathdir(source) and use_stdin == False
    isdestdir = ispathdir(destination)

    srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
    many, flat, prefix = get_mfp(nsrc, recursive)

    if issrcdir and not many:
        print(f"cloudfiles: {source} is a directory (not copied).")
        return

    xferpaths = os.path.basename(nsrc)
    if use_stdin:
        xferpaths = sys.stdin.readlines()
        xferpaths = [x.replace("\n", "") for x in xferpaths]
        prefix = os.path.commonprefix(xferpaths)
        xferpaths = [x.replace(prefix, "") for x in xferpaths]
        srcpath = cloudpathjoin(srcpath, prefix)
    elif many:
        xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix,
                                                         flat=flat)

    destpath = ndest
    if isinstance(xferpaths, str):
        destpath = ndest if isdestdir else os.path.dirname(ndest)
    elif not isdestdir:
        if os.path.exists(ndest.replace("file://", "")):
            print(f"cloudfiles: {ndest} is not a directory (not copied).")
            return

    if compression == "same":
        compression = None
    elif compression == "none":
        compression = False

    if not isinstance(xferpaths, str):
        if parallel == 1:
            _cp(srcpath, destpath, compression, progress, block_size,
                xferpaths)
            return

        total = None
        try:
            total = len(xferpaths)
        except TypeError:
            pass

        fn = partial(_cp, srcpath, destpath, compression, False, block_size)
        with tqdm(desc="Transferring", total=total,
                  disable=(not progress)) as pbar:
            with pathos.pools.ProcessPool(parallel) as executor:
                for _ in executor.imap(fn, sip(xferpaths, block_size)):
                    pbar.update(block_size)
    else:
        cfsrc = CloudFiles(srcpath, green=True, progress=progress)
        if not cfsrc.exists(xferpaths):
            print(
                f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}"
            )
            return

        downloaded = cfsrc.get(xferpaths, raw=True)
        if compression is not None:
            downloaded = transcode(downloaded, compression, in_place=True)

        cfdest = CloudFiles(destpath, green=True, progress=progress)
        if isdestdir:
            cfdest.put(os.path.basename(nsrc), downloaded, raw=True)
        else:
            cfdest.put(os.path.basename(ndest), downloaded, raw=True)
Esempio n. 7
0
def setup_environment(dry_run, volume_start, volume_stop, volume_size,
                      layer_path, max_ram_size, output_patch_size,
                      input_patch_size, channel_num, dtype,
                      output_patch_overlap, crop_chunk_margin, mip,
                      thumbnail_mip, max_mip, thumbnail, encoding, voxel_size,
                      overwrite_info):
    """Prepare storage info files and produce tasks."""
    assert not (volume_stop is None and volume_size is None)
    if isinstance(volume_start, tuple):
        volume_start = Vec(*volume_start)
    if isinstance(volume_stop, tuple):
        volume_stop = Vec(*volume_stop)
    if isinstance(volume_size, tuple):
        volume_size = Vec(*volume_size)

    if input_patch_size is None:
        input_patch_size = output_patch_size

    if volume_size is not None:
        assert len(volume_size) == 3
        assert volume_stop is None
        volume_stop = volume_start + volume_size
    else:
        volume_size = volume_stop - volume_start
    logging.info('\noutput volume start: ' + tuple2string(volume_start))
    logging.info('output volume stop: ' + tuple2string(volume_stop))
    logging.info('output volume size: ' + tuple2string(volume_size))

    if output_patch_overlap is None:
        # use 50% patch overlap in default
        output_patch_overlap = tuple(s // 2 for s in output_patch_size)
    assert output_patch_overlap[1] == output_patch_overlap[2]

    if crop_chunk_margin is None:
        crop_chunk_margin = output_patch_overlap
    assert crop_chunk_margin[1] == crop_chunk_margin[2]
    logging.info('margin size: ' + tuple2string(crop_chunk_margin))

    if thumbnail:
        # thumnail requires maximum mip level of 5
        thumbnail_mip = max(thumbnail_mip, 5)

    block_size, output_chunk_size, factor = get_optimized_block_size(
        output_patch_size, output_patch_overlap, max_ram_size, channel_num,
        max_mip, crop_chunk_margin, input_patch_size, mip, thumbnail_mip,
        volume_start)

    if not dry_run:
        storage = CloudFiles(layer_path)
        thumbnail_layer_path = os.path.join(layer_path, 'thumbnail')
        thumbnail_storage = CloudFiles(thumbnail_layer_path)

        if not overwrite_info:
            logging.info(
                '\ncheck that we are not overwriting existing info file.')
            assert storage.exists('info')
            assert thumbnail_storage.exists('info')

        if overwrite_info:
            logging.info(f'create and upload info file to {layer_path}')
            # Note that cloudvolume use fortran order rather than C order
            info = CloudVolume.create_new_info(channel_num,
                                               layer_type='image',
                                               data_type=dtype,
                                               encoding=encoding,
                                               resolution=voxel_size[::-1],
                                               voxel_offset=volume_start[::-1],
                                               volume_size=volume_size[::-1],
                                               chunk_size=block_size[::-1],
                                               max_mip=mip)
            vol = CloudVolume(layer_path, info=info)
            vol.commit_info()

        if overwrite_info:
            thumbnail_factor = 2**thumbnail_mip
            thumbnail_block_size = (output_chunk_size[0] // factor,
                                    output_chunk_size[1] // thumbnail_factor,
                                    output_chunk_size[2] // thumbnail_factor)
            logging.info('thumbnail block size: ' +
                         tuple2string(thumbnail_block_size))
            thumbnail_info = CloudVolume.create_new_info(
                1,
                layer_type='image',
                data_type='uint8',
                encoding='raw',
                resolution=voxel_size[::-1],
                voxel_offset=volume_start[::-1],
                volume_size=volume_size[::-1],
                chunk_size=thumbnail_block_size[::-1],
                max_mip=thumbnail_mip)
            thumbnail_vol = CloudVolume(thumbnail_layer_path,
                                        info=thumbnail_info)
            thumbnail_vol.commit_info()

    logging.info('create a list of bounding boxes...')
    roi_start = (volume_start[0], volume_start[1] // factor,
                 volume_start[2] // factor)
    roi_size = (volume_size[0], volume_size[1] // factor,
                volume_size[2] // factor)
    roi_stop = tuple(s + z for s, z in zip(roi_start, roi_size))

    # create bounding boxes and ingest to queue
    bboxes = BoundingBoxes.from_manual_setup(output_chunk_size,
                                             roi_start=roi_start,
                                             roi_stop=roi_stop)
    logging.info(f'total number of tasks: {len(bboxes)}')

    logging.debug(f'bounding boxes: {bboxes}')

    print(
        yellow(
            'Note that you should reuse the printed out parameters in the production run.'
            + ' These parameters are not ingested to AWS SQS queue.'))
    return bboxes