def test_exists(s3, protocol): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "exists") cf = CloudFiles(url, num_threads=5) content = b'some_string' cf.put('info', content, compress=None) assert cf.exists('info') assert not cf.exists('doesntexist') assert cf.exists(['info'])['info'] assert not cf.exists(['doesntexist'])['doesntexist'] cf.delete('info')
def exists(self, segids, progress=None): """ Checks if the mesh exists Returns: { MultiLevelPrecomputedMeshManifest or None, ... } """ cf = CloudFiles(self.path) return cf.exists((f"{segid}.index" for segid in segids))
def test_read_write(s3, protocol, num_threads, green): from cloudfiles import CloudFiles, exceptions url = compute_url(protocol, "rw") cf = CloudFiles(url, num_threads=num_threads, green=green) content = b'some_string' cf.put('info', content, compress=None, cache_control='no-cache') cf['info2'] = content assert cf.get('info') == content assert cf['info2'] == content assert cf['info2', 0:3] == content[0:3] assert cf['info2', :] == content[:] assert cf.get('nonexistentfile') is None assert cf.get('info', return_dict=True) == {"info": content} assert cf.get(['info', 'info2'], return_dict=True) == { "info": content, "info2": content } del cf['info2'] assert cf.exists('info2') == False num_infos = max(num_threads, 1) results = cf.get(['info' for i in range(num_infos)]) assert len(results) == num_infos assert results[0]['path'] == 'info' assert results[0]['content'] == content assert all(map(lambda x: x['error'] is None, results)) assert cf.get(['nonexistentfile'])[0]['content'] is None cf.delete('info') cf.put_json('info', {'omg': 'wow'}, cache_control='no-cache') results = cf.get_json('info') assert results == {'omg': 'wow'} cf.delete('info') if protocol == 'file': rmtree(url)
def exists(self, segids, progress=None): """ Checks if the mesh exists. Returns: { label: path or None, ... } """ manifest_paths = [ self.manifest_path(segid) for segid in segids ] progress = progress if progress is not None else self.config.progress cf = CloudFiles(self.meta.cloudpath, progress=progress, green=self.config.green) exists = cf.exists(manifest_paths) segid_regexp = re.compile(r'(\d+):0$') output = {} for path, there in exists.items(): (segid,) = re.search(segid_regexp, path).groups() output[segid] = path if there else None return output
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') use_stdout = (destination == '-') if use_stdout: progress = False # can't have the progress bar interfering nsrc = normalize_path(source) ndest = normalize_path(destination) # For more information see: # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed # Try to follow cp rules. If the directory exists, # copy the base source directory into the dest directory # If the directory does not exist, then we copy into # the dest directory. # Both x* and x** should not copy the base directory if recursive and nsrc[-1] != "*": if CloudFiles(ndest).isdir(): if nsrc[-1] == '/': nsrc = nsrc[:-1] ndest = cloudpathjoin(ndest, os.path.basename(nsrc)) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass if use_stdout: fn = partial(_cp_stdout, srcpath) else: fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return if use_stdout: _cp_stdout(srcpath, xferpaths) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)
def _cp_single(ctx, source, destination, recursive, compression, progress, block_size): use_stdin = (source == '-') nsrc = normalize_path(source) ndest = normalize_path(destination) ctx.ensure_object(dict) parallel = int(ctx.obj.get("parallel", 1)) issrcdir = ispathdir(source) and use_stdin == False isdestdir = ispathdir(destination) srcpath = nsrc if issrcdir else os.path.dirname(nsrc) many, flat, prefix = get_mfp(nsrc, recursive) if issrcdir and not many: print(f"cloudfiles: {source} is a directory (not copied).") return xferpaths = os.path.basename(nsrc) if use_stdin: xferpaths = sys.stdin.readlines() xferpaths = [x.replace("\n", "") for x in xferpaths] prefix = os.path.commonprefix(xferpaths) xferpaths = [x.replace(prefix, "") for x in xferpaths] srcpath = cloudpathjoin(srcpath, prefix) elif many: xferpaths = CloudFiles(srcpath, green=True).list(prefix=prefix, flat=flat) destpath = ndest if isinstance(xferpaths, str): destpath = ndest if isdestdir else os.path.dirname(ndest) elif not isdestdir: if os.path.exists(ndest.replace("file://", "")): print(f"cloudfiles: {ndest} is not a directory (not copied).") return if compression == "same": compression = None elif compression == "none": compression = False if not isinstance(xferpaths, str): if parallel == 1: _cp(srcpath, destpath, compression, progress, block_size, xferpaths) return total = None try: total = len(xferpaths) except TypeError: pass fn = partial(_cp, srcpath, destpath, compression, False, block_size) with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar: with pathos.pools.ProcessPool(parallel) as executor: for _ in executor.imap(fn, sip(xferpaths, block_size)): pbar.update(block_size) else: cfsrc = CloudFiles(srcpath, green=True, progress=progress) if not cfsrc.exists(xferpaths): print( f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}" ) return downloaded = cfsrc.get(xferpaths, raw=True) if compression is not None: downloaded = transcode(downloaded, compression, in_place=True) cfdest = CloudFiles(destpath, green=True, progress=progress) if isdestdir: cfdest.put(os.path.basename(nsrc), downloaded, raw=True) else: cfdest.put(os.path.basename(ndest), downloaded, raw=True)
def setup_environment(dry_run, volume_start, volume_stop, volume_size, layer_path, max_ram_size, output_patch_size, input_patch_size, channel_num, dtype, output_patch_overlap, crop_chunk_margin, mip, thumbnail_mip, max_mip, thumbnail, encoding, voxel_size, overwrite_info): """Prepare storage info files and produce tasks.""" assert not (volume_stop is None and volume_size is None) if isinstance(volume_start, tuple): volume_start = Vec(*volume_start) if isinstance(volume_stop, tuple): volume_stop = Vec(*volume_stop) if isinstance(volume_size, tuple): volume_size = Vec(*volume_size) if input_patch_size is None: input_patch_size = output_patch_size if volume_size is not None: assert len(volume_size) == 3 assert volume_stop is None volume_stop = volume_start + volume_size else: volume_size = volume_stop - volume_start logging.info('\noutput volume start: ' + tuple2string(volume_start)) logging.info('output volume stop: ' + tuple2string(volume_stop)) logging.info('output volume size: ' + tuple2string(volume_size)) if output_patch_overlap is None: # use 50% patch overlap in default output_patch_overlap = tuple(s // 2 for s in output_patch_size) assert output_patch_overlap[1] == output_patch_overlap[2] if crop_chunk_margin is None: crop_chunk_margin = output_patch_overlap assert crop_chunk_margin[1] == crop_chunk_margin[2] logging.info('margin size: ' + tuple2string(crop_chunk_margin)) if thumbnail: # thumnail requires maximum mip level of 5 thumbnail_mip = max(thumbnail_mip, 5) block_size, output_chunk_size, factor = get_optimized_block_size( output_patch_size, output_patch_overlap, max_ram_size, channel_num, max_mip, crop_chunk_margin, input_patch_size, mip, thumbnail_mip, volume_start) if not dry_run: storage = CloudFiles(layer_path) thumbnail_layer_path = os.path.join(layer_path, 'thumbnail') thumbnail_storage = CloudFiles(thumbnail_layer_path) if not overwrite_info: logging.info( '\ncheck that we are not overwriting existing info file.') assert storage.exists('info') assert thumbnail_storage.exists('info') if overwrite_info: logging.info(f'create and upload info file to {layer_path}') # Note that cloudvolume use fortran order rather than C order info = CloudVolume.create_new_info(channel_num, layer_type='image', data_type=dtype, encoding=encoding, resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=block_size[::-1], max_mip=mip) vol = CloudVolume(layer_path, info=info) vol.commit_info() if overwrite_info: thumbnail_factor = 2**thumbnail_mip thumbnail_block_size = (output_chunk_size[0] // factor, output_chunk_size[1] // thumbnail_factor, output_chunk_size[2] // thumbnail_factor) logging.info('thumbnail block size: ' + tuple2string(thumbnail_block_size)) thumbnail_info = CloudVolume.create_new_info( 1, layer_type='image', data_type='uint8', encoding='raw', resolution=voxel_size[::-1], voxel_offset=volume_start[::-1], volume_size=volume_size[::-1], chunk_size=thumbnail_block_size[::-1], max_mip=thumbnail_mip) thumbnail_vol = CloudVolume(thumbnail_layer_path, info=thumbnail_info) thumbnail_vol.commit_info() logging.info('create a list of bounding boxes...') roi_start = (volume_start[0], volume_start[1] // factor, volume_start[2] // factor) roi_size = (volume_size[0], volume_size[1] // factor, volume_size[2] // factor) roi_stop = tuple(s + z for s, z in zip(roi_start, roi_size)) # create bounding boxes and ingest to queue bboxes = BoundingBoxes.from_manual_setup(output_chunk_size, roi_start=roi_start, roi_stop=roi_stop) logging.info(f'total number of tasks: {len(bboxes)}') logging.debug(f'bounding boxes: {bboxes}') print( yellow( 'Note that you should reuse the printed out parameters in the production run.' + ' These parameters are not ingested to AWS SQS queue.')) return bboxes