def post_swcs_to_dvid(config, items): """ Send the given SWC files as key/value pairs to DVID. Args: config: The CreateSkeletons workflow config data items: list-of-tuples (body_id, swc_text, error_text) If swc_text is None or error_text is NOT None, then nothing is posted. (We could have filtered out such items upstream, but it's convenient to just handle it here.) """ # Re-use session for connection pooling. session = default_dvid_session() # Re-use resource manager client connections, too. # (If resource-server is empty, this will return a "dummy client") resource_client = ResourceManagerClient( config["options"]["resource-server"], config["options"]["resource-port"] ) dvid_server = config["dvid-info"]["dvid"]["server"] uuid = config["dvid-info"]["dvid"]["uuid"] instance = config["dvid-info"]["dvid"]["skeletons-destination"] for (body_id, swc_contents, err) in items: if swc_contents is None or err is not None: continue swc_contents = swc_contents.encode('utf-8') @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_swc(): with resource_client.access_context(dvid_server, False, 1, len(swc_contents)): session.post(f'{dvid_server}/api/node/{uuid}/{instance}/key/{body_id}_swc', swc_contents) write_swc()
def get_legacy_sparsevol(cls, server, uuid, instance_name, body_id, scale=0): """ Returns the coordinates (Z,Y,X) of all voxels in the given body_id at the given scale. Note: For large bodies, this will be a LOT of coordinates at scale 0. Note: The returned coordinates are native to the requested scale. For instance, if the first Z-coordinate at scale 0 is 128, then at scale 1 it is 64, etc. Note: This function requests the data from DVID in the legacy 'rles' format, which is much less efficient than the newer 'blocks' format (but it's easy enough to parse that we can do it in Python). Return an array of coordinates of the form: [[Z,Y,X], [Z,Y,X], [Z,Y,X], ... ] """ if not server.startswith('http://'): server = 'http://' + server session = default_dvid_session() r = session.get(f'{server}/api/node/{uuid}/{instance_name}/sparsevol/{body_id}?format=rles&scale={scale}') r.raise_for_status() return parse_rle_response( r.content )
def extend_list_value(dvid_server, uuid, kv_instance, key, new_list): """ For the list stored at the given keyvalue instance and key, extend it with the given new_list. If the keyvalue instance and/or key are missing from the server, create them. """ assert isinstance(new_list, list) old_list = [] session = default_dvid_session() r = session.get('{dvid_server}/api/node/{uuid}/{kv_instance}/keys'.format(**locals())) if r.status_code not in (200,400): r.raise_for_status() if r.status_code == 400: # Create the keyvalue instance first r_post = session.post('{dvid_server}/api/repo/{uuid}/instance'.format(**locals()), json={ "typename": "keyvalue", "dataname": kv_instance } ) r_post.raise_for_status() elif key in r.json(): # Fetch original value r = session.get('{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(**locals())) r.raise_for_status() old_list = r.json() assert isinstance(old_list, list) new_list = list(set(old_list + new_list)) if set(new_list) != set(old_list): logger.debug("Updating '{}/{}' list from: {} to: {}".format( kv_instance, key, old_list, new_list )) r = session.post('{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format(**locals()), json=new_list) r.raise_for_status()
def post_meshes_to_dvid(config, instance_name, partition_items): """ Send the given meshes (either .obj or .drc) as key/value pairs to DVID. Args: config: The CreateMeshes workflow config data instance_name: key-value instance to post to partition_items: tuple (group_id, [(segment_id, mesh_data), (segment_id, mesh_data)]) """ # Re-use session for connection pooling. session = default_dvid_session() # Re-use resource manager client connections, too. # (If resource-server is empty, this will return a "dummy client") resource_client = ResourceManagerClient( config["options"]["resource-server"], config["options"]["resource-port"] ) dvid_server = config["dvid-info"]["dvid"]["server"] uuid = config["dvid-info"]["dvid"]["uuid"] grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"] mesh_format = config["mesh-config"]["storage"]["format"] if grouping_scheme == "no-groups": for group_id, segment_ids_and_meshes in partition_items: for (segment_id, mesh_data) in segment_ids_and_meshes: @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_mesh(): with resource_client.access_context(dvid_server, False, 2, len(mesh_data)): session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}', mesh_data) session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}_info', json={ 'format': mesh_format }) write_mesh() else: # All other grouping schemes, including 'singletons' write tarballs. # (In the 'singletons' case, there is just one tarball per body.) for group_id, segment_ids_and_meshes in partition_items: tar_name = _get_group_name(config, group_id) tar_stream = BytesIO() with closing(tarfile.open(tar_name, 'w', tar_stream)) as tf: for (segment_id, mesh_data) in segment_ids_and_meshes: mesh_name = _get_mesh_name(config, segment_id) f_info = tarfile.TarInfo(mesh_name) f_info.size = len(mesh_data) tf.addfile(f_info, BytesIO(mesh_data)) tar_bytes = tar_stream.getbuffer() @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_tar(): with resource_client.access_context(dvid_server, False, 1, len(tar_bytes)): session.post(f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{tar_name}', tar_bytes) write_tar()
def update_extents(dvid_server, uuid, name, minimal_extents_zyx): """ Ensure that the given data instance has at least the given extents. Args: dvid_server (str): location of dvid server uuid (str): version id name (str): data instance name minimal_extents: 3D bounding box [min_zyx, max_zyx] = [(z0,y0,x0), (z1,y1,x1)]. If provided, data extents will be at least this large (possibly larger). (The max extent should use python conventions, i.e. the MaxPoint + 1) """ session = default_dvid_session() minimal_extents_zyx = np.array(minimal_extents_zyx, dtype=int) assert minimal_extents_zyx.shape == (2,3), \ "Minimal extents must be provided as a 3D bounding box: [(z0,y0,x0), (z1,y1,x1)]" logger.info("Updating extents for {uuid}/{name}".format(**locals())) minimal_extents_xyz = minimal_extents_zyx[:, ::-1].copy() # Fetch original extents. r = session.get( '{dvid_server}/api/node/{uuid}/{name}/info'.format(**locals())) r.raise_for_status() info = r.json() logger.debug("Read extents: " + json.dumps(info)) orig_extents_xyz = np.array([(1e9, 1e9, 1e9), (-1e9, -1e9, -1e9)], dtype=int) if info["Extended"]["MinPoint"] is not None: orig_extents_xyz[0] = info["Extended"]["MinPoint"] if info["Extended"]["MaxPoint"] is not None: orig_extents_xyz[1] = info["Extended"]["MaxPoint"] orig_extents_xyz[1] += 1 minimal_extents_xyz[0] = np.minimum(minimal_extents_xyz[0], orig_extents_xyz[0]) minimal_extents_xyz[1] = np.maximum(minimal_extents_xyz[1], orig_extents_xyz[1]) if (minimal_extents_xyz != orig_extents_xyz).any(): min_point_xyz = minimal_extents_xyz[0] max_point_xyz = minimal_extents_xyz[1] - 1 extents_json = { "MinPoint": min_point_xyz.tolist(), "MaxPoint": max_point_xyz.tolist() } url = '{dvid_server}/api/node/{uuid}/{name}/extents'.format(**locals()) logger.debug("Posting new extents: {}".format( json.dumps(extents_json))) r = session.post(url, json=extents_json) r.raise_for_status()
def update_extents(dvid_server, uuid, name, minimal_extents_zyx): """ Ensure that the given data instance has at least the given extents. Args: dvid_server (str): location of dvid server uuid (str): version id name (str): data instance name minimal_extents: 3D bounding box [min_zyx, max_zyx] = [(z0,y0,x0), (z1,y1,x1)]. If provided, data extents will be at least this large (possibly larger). (The max extent should use python conventions, i.e. the MaxPoint + 1) """ session = default_dvid_session() minimal_extents_zyx = np.array(minimal_extents_zyx, dtype=int) assert minimal_extents_zyx.shape == (2,3), \ "Minimal extents must be provided as a 3D bounding box: [(z0,y0,x0), (z1,y1,x1)]" logger.info("Updating extents for {uuid}/{name}".format(**locals()) ) minimal_extents_xyz = minimal_extents_zyx[:, ::-1].copy() # Fetch original extents. r = session.get('{dvid_server}/api/node/{uuid}/{name}/info'.format(**locals())) r.raise_for_status() info = r.json() logger.debug( "Read extents: " + json.dumps(info) ) orig_extents_xyz = np.array( [(1e9, 1e9, 1e9), (-1e9, -1e9, -1e9)], dtype=int ) if info["Extended"]["MinPoint"] is not None: orig_extents_xyz[0] = info["Extended"]["MinPoint"] if info["Extended"]["MaxPoint"] is not None: orig_extents_xyz[1] = info["Extended"]["MaxPoint"] orig_extents_xyz[1] += 1 minimal_extents_xyz[0] = np.minimum(minimal_extents_xyz[0], orig_extents_xyz[0]) minimal_extents_xyz[1] = np.maximum(minimal_extents_xyz[1], orig_extents_xyz[1]) if (minimal_extents_xyz != orig_extents_xyz).any(): min_point_xyz = minimal_extents_xyz[0] max_point_xyz = minimal_extents_xyz[1] - 1 extents_json = { "MinPoint": min_point_xyz.tolist(), "MaxPoint": max_point_xyz.tolist() } url = '{dvid_server}/api/node/{uuid}/{name}/extents'.format(**locals()) logger.debug("Posting new extents: {}".format( json.dumps(extents_json) )) r = session.post( url, json=extents_json ) r.raise_for_status()
def _execute_mappings(self, mapping_df): config = self.config_data if mapping_df is None: raise RuntimeError( "Can't load mappings: No agglomeration mapping provided.") # Just do this from a single machine (the driver), with a big batch size # The writes are serialized on the DVID side, anyway. with Timer("Sending mapping", logger): ingest_mapping(config["dvid"]["server"], config["dvid"]["uuid"], config["dvid"]["segmentation-name"], config["options"]["mutation-id"], mapping_df, batch_size=100_000, show_progress_bar=False, session=default_dvid_session())
def extend_list_value(dvid_server, uuid, kv_instance, key, new_list): """ For the list stored at the given keyvalue instance and key, extend it with the given new_list. If the keyvalue instance and/or key are missing from the server, create them. """ assert isinstance(new_list, list) old_list = [] session = default_dvid_session() r = session.get( '{dvid_server}/api/node/{uuid}/{kv_instance}/keys'.format(**locals())) if r.status_code not in (200, 400): r.raise_for_status() if r.status_code == 400: # Create the keyvalue instance first r_post = session.post( '{dvid_server}/api/repo/{uuid}/instance'.format(**locals()), json={ "typename": "keyvalue", "dataname": kv_instance }) r_post.raise_for_status() elif key in r.json(): # Fetch original value r = session.get( '{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format( **locals())) r.raise_for_status() old_list = r.json() assert isinstance(old_list, list) new_list = list(set(old_list + new_list)) if set(new_list) != set(old_list): logger.debug("Updating '{}/{}' list from: {} to: {}".format( kv_instance, key, old_list, new_list)) r = session.post( '{dvid_server}/api/node/{uuid}/{kv_instance}/key/{key}'.format( **locals()), json=new_list) r.raise_for_status()
def post_swcs_to_dvid(config, items): """ Send the given SWC files as key/value pairs to DVID. Args: config: The CreateSkeletons workflow config data items: list-of-tuples (body_id, swc_text, error_text) If swc_text is None or error_text is NOT None, then nothing is posted. (We could have filtered out such items upstream, but it's convenient to just handle it here.) """ # Re-use session for connection pooling. session = default_dvid_session() # Re-use resource manager client connections, too. # (If resource-server is empty, this will return a "dummy client") resource_client = ResourceManagerClient( config["options"]["resource-server"], config["options"]["resource-port"]) dvid_server = config["dvid-info"]["dvid"]["server"] uuid = config["dvid-info"]["dvid"]["uuid"] instance = config["dvid-info"]["dvid"]["skeletons-destination"] for (body_id, swc_contents, err) in items: if swc_contents is None or err is not None: continue swc_contents = swc_contents.encode('utf-8') @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_swc(): with resource_client.access_context(dvid_server, False, 1, len(swc_contents)): session.post( f'{dvid_server}/api/node/{uuid}/{instance}/key/{body_id}_swc', swc_contents) write_swc()
def timed_fetch_blocks_from_box(box): """ Fetch the blocks for a given box and return the time it took to fetch them. Do not bother decompressing the blocks or combining them into a single volume. """ assert not (box % block_shape).any(), "For this test, all requests must be block-aligned" block_boxes = list( boxes_from_grid(box, Grid(block_shape)) ) block_coords_xyz = np.array(block_boxes)[:,0,::-1] // block_shape block_coords_str = ','.join(map(str, block_coords_xyz.flat)) voxel_count = np.prod(box[1] - box[0]) session = default_dvid_session() url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}' with resource_mgr_client.access_context(server, True, 1, voxel_count): timestamp = datetime.now() with Timer() as timer: r = session.get(url) r.raise_for_status() return timestamp, voxel_count, len(r.content), timer.seconds
def get_roi(self, roi): """ An alternate implementation of libdvid.DVIDNodeService.get_roi(), since DVID sometimes returns strange 503 errors and DVIDNodeService.get_roi() doesn't know how to handle them. """ session = default_dvid_session() # grab roi blocks (should use libdvid but there are problems handling 206 status) import requests addr = self.dvid_server + "/api/node/" + str(self.uuid) + "/" + str(roi) + "/roi" if not self.dvid_server.startswith("http://"): addr = "http://" + addr data = session.get(addr) roi_blockruns = data.json() roi_blocks = [] for (z,y,x_first, x_last) in roi_blockruns: for x in range(x_first, x_last+1): roi_blocks.append((z,y,x)) return roi_blocks
def timed_fetch_blocks_from_box(box): """ Fetch the blocks for a given box and return the time it took to fetch them. Do not bother decompressing the blocks or combining them into a single volume. """ assert not (box % block_shape).any( ), "For this test, all requests must be block-aligned" block_boxes = list(boxes_from_grid(box, Grid(block_shape))) block_coords_xyz = np.array(block_boxes)[:, 0, ::-1] // block_shape block_coords_str = ','.join(map(str, block_coords_xyz.flat)) voxel_count = np.prod(box[1] - box[0]) session = default_dvid_session() url = f'{server}/api/node/{uuid}/{instance}/specificblocks?blocks={block_coords_str}' with resource_mgr_client.access_context(server, True, 1, voxel_count): timestamp = datetime.now() with Timer() as timer: r = session.get(url) r.raise_for_status() return timestamp, voxel_count, len(r.content), timer.seconds
def get_coarse_sparsevol(cls, server, uuid, instance_name, body_id, supervoxels=False): """ Return the 'coarse sparsevol' representation of a given body. This is similar to the sparsevol representation at scale=6, EXCEPT that it is generated from the label index, so no blocks are lost from downsampling. Return an array of coordinates of the form: [[Z,Y,X], [Z,Y,X], [Z,Y,X], ... ] """ supervoxels = str(bool(supervoxels)).lower() if not server.startswith('http://'): server = 'http://' + server session = default_dvid_session() r = session.get(f'{server}/api/node/{uuid}/{instance_name}/sparsevol-coarse/{body_id}?supervoxels={supervoxels}') r.raise_for_status() return parse_rle_response( r.content )
def is_node_locked(dvid_server, uuid): # Verify that the node is open for writing! session = default_dvid_session() r = session.get(f'{dvid_server}/api/node/{uuid}/commit') r.raise_for_status() return r.json()["Locked"]
def writeimagepyramid(vol3d): blknum, vol = vol3d from PIL import Image from scipy import ndimage import io import numpy s = default_dvid_session() # create thread pool for parallel from multiprocessing.dummy import Pool as ThreadPool NUM_THREADS = 4 ACTIVE_REQS = 16 #pool = ThreadPool(NUM_THREADS) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair s.post(urlreq, data=reqbuff) work_queue = [] # iterate slice by slice for slicenum in range(0, BLKSIZE): imslice = vol[slicenum, :, :] imlevels = [] imlevels.append(imslice) # use generic downsample algorithm for level in range(1, maxlevel + 1): dim1, dim2 = imlevels[level - 1].shape # go to max level regardless of actual image size #if dim1 < TILESIZE and dim2 < TILESIZE: # image size is already smaller even though not at max level #print "Not at max level" # break imlevels.append( ndimage.interpolation.zoom(imlevels[level - 1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1 - 1) // TILESIZE + 1 num2tiles = (dim2 - 1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1 * TILESIZE min2 = iter2 * TILESIZE tileslice = levelslice[min1:min1 + TILESIZE, min2:min2 + TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) if axis == "xy": work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum + blknum * BLKSIZE), buf.getvalue())) elif axis == "xz": work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum + blknum * BLKSIZE) + "_" + str(iter1), buf.getvalue())) else: work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum + blknum * BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue())) buf.close() # submit last jobs if any remain if len(work_queue) == ACTIVE_REQS: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() work_queue = [] # submit last jobs if any remain if len(work_queue) > 0: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join()
def session(self): if self._session is None: self._session = default_dvid_session('ingest_label_indexes') return self._session
def execute(self): from PIL import Image import numpy import os iterslices = self.BLKSIZE * self.config_data["options"]["numblocklayers"] minslice = self.config_data["minslice"] # map file to numpy array basename = self.config_data["basename"] # format should be gs://<bucket>/path gbucketname = "" gpath = "" if basename.startswith('gs://'): # parse google bucket names tempgs = basename.split('//') bucketpath = tempgs[1].split('/') gbucketname = bucketpath[0] gpath = '/'.join(bucketpath[1:]) server = None xoffset = yoffset = zoffset = 0 if "offset" in self.config_data["options"]: xoffset = self.config_data["options"]["offset"][0] yoffset = self.config_data["options"]["offset"][1] zoffset = self.config_data["options"]["offset"][2] if xoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if yoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if zoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") xoffset /= self.BLKSIZE yoffset /= self.BLKSIZE zoffset /= self.BLKSIZE # this will start the Z block writing at the specified offse # (changes default behavior when loading nonzero starting image slice) zoffset -= (minslice // self.BLKSIZE) # create metadata before workers start if using DVID if "output-dir" not in self.config_data or self.config_data["output-dir"] == "": # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] resource_server = str(self.resource_server) resource_port = self.resource_port # create grayscale type node_service = retrieve_node_service(server, uuid, resource_server, resource_port, self.APPNAME) node_service.create_grayscale8(str(grayname), self.BLKSIZE) for slice in range(self.config_data["minslice"], self.config_data["maxslice"]+1, iterslices): # parallelize images across many machines imgs = self.sc.parallelize(list(range(slice, slice+iterslices)), iterslices) def img2npy(slicenum): try: img = None if gbucketname == "": img = Image.open(basename % slicenum) else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % slicenum) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) return slicenum, numpy.array(img) except Exception as e: # just return a blank slice -- will be handled downstream return slicenum, numpy.zeros((0,0), numpy.uint8) npy_images = imgs.map(img2npy) # map numpy array into y lines of block height blocksize = self.BLKSIZE blocklimit = self.BLOCKLIMIT def npy2lines(arrpair): z, arr = arrpair ysize, xsize = arr.shape npylines = [] for itery in range(0, ysize, blocksize): line = numpy.zeros((blocksize, ((xsize-1) // blocksize + 1)*blocksize), numpy.uint8) uppery = blocksize if (itery + blocksize) > ysize: uppery = ysize - itery line[0:uppery, 0:xsize] = arr[itery:itery+blocksize, 0:xsize] npylines.append((itery // blocksize, (z, line))) return npylines npy_lines = npy_images.flatMap(npy2lines) # reduce y lines into DVID blocks groupedlines = npy_lines.groupByKey() # map y lines => (y, blocks) def lines2blocks(linespair): y, linesp = linespair xsize = None blockdata = None for z, line in linesp: if xsize is None: _, xsize = line.shape blockdata = numpy.zeros((iterslices, blocksize, xsize), numpy.uint8) blockdata[(z - minslice)%iterslices, :, :] = line return y, blockdata yblocks = groupedlines.map(lines2blocks) # map multilayer of blocks to an array of single layer blocks def multi2single(yblocks): ybindex, blocks = yblocks blockarr = [] num_layers = iterslices // blocksize for layer in range(0,num_layers): blockarr.append(((ybindex, layer), blocks[layer*blocksize:(layer*blocksize+blocksize),:,:])) return blockarr yblockssplit = yblocks.flatMap(multi2single) if "output-dir" in self.config_data and self.config_data["output-dir"] != "": # write blocks to disk for separte post-process -- write directly to DVID eventually? output_dir = self.config_data["output-dir"] def write2disk(yblocks): zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize,ysize,xsize = blocks.shape outdir = output_dir outdir += "/" + ("%05d" % zbindex) + ".z/" filename = outdir + ("%05d" % ybindex) + "-" + str(xsize // blocksize) + ".blocks" try: os.makedirs(outdir) except Exception as e: pass # extract blocks from buffer and write to disk fout = open(filename, 'wb') for iterx in range(0, xsize, blocksize): block = blocks[:,:,iterx:iterx+blocksize].copy() fout.write(block) fout.close() yblockssplit.foreach(write2disk) else: # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize,ysize,xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:,:,iterx:iterx+blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) yblockssplit.foreach(write2dvid) self.workflow_entry_exit_printer.write_data("Ingested %d slices" % iterslices) # just fetch one image at driver to get dims width = height = 1 try: img = None if gbucketname == "": img = Image.open(basename % minslice) width, height = img.width, img.height else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % minslice) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) width, height = img.width, img.height except Exception as e: # just set size to 1 pass if "output-dir" not in self.config_data or self.config_data["output-dir"] == "": # update metadata grayext = {} grayext["MinPoint"] = [xoffset*self.BLKSIZE,yoffset*self.BLKSIZE,zoffset*self.BLKSIZE+minslice] grayext["MaxPoint"] = [xoffset*self.BLKSIZE + width-1, yoffset*self.BLKSIZE + height-1, zoffset*self.BLKSIZE+minslice + self.config_data["maxslice"]] if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() session.post(server + "/api/node/" + uuid + "/" + grayname + "/extents", json=grayext)
def writeimagepyramid(part_data): logger = logging.getLogger(__name__) part, vol = part_data offset = part.get_offset() zslice = offset.z from PIL import Image from scipy import ndimage import io s = default_dvid_session() # pad data with delimiter if needed timslice = vol[0, :, :] shiftx = offset.x % tilesize shifty = offset.y % tilesize tysize, txsize = timslice.shape ysize = tysize + shifty xsize = txsize + shiftx imslice = np.zeros((ysize, xsize)) imslice[:, :] = delimiter imslice[shifty:ysize, shiftx:xsize] = timslice curry = (offset.y - shifty) // 2 currx = (offset.x - shiftx) // 2 imlevels = [] tileoffsetyx = [] imlevels.append(imslice) tileoffsetyx.append((offset.y // tilesize, offset.x // tilesize)) with Timer() as downsample_timer: # use generic downsample algorithm for level in range(1, maxlevel + 1): tysize, txsize = imlevels[level - 1].shape shiftx = currx % tilesize shifty = curry % tilesize ysize = tysize + shifty xsize = txsize + shiftx imslice = np.zeros((ysize, xsize)) imslice[:, :] = delimiter timslice = ndimage.interpolation.zoom( imlevels[level - 1], 0.5) imslice[shifty:ysize, shiftx:xsize] = timslice imlevels.append(imslice) tileoffsetyx.append((currx // tilesize, curry // tilesize)) curry = (curry - shifty) // 2 currx = (currx - shiftx) // 2 logger.info("Downsampled {} levels in {:.3f} seconds".format( maxlevel, downsample_timer.seconds)) # write tile pyramid using custom requests for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1 - 1) // tilesize + 1 num2tiles = (dim2 - 1) // tilesize + 1 with Timer() as post_timer: for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = np.zeros((tilesize, tilesize), np.uint8) tileholder[:, :] = delimiter min1 = iter1 * tilesize min2 = iter2 * tilesize tileslice = levelslice[min1:min1 + tilesize, min2:min2 + tilesize] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice starty, startx = tileoffsetyx[levelnum] starty += iter1 startx += iter2 if createtiles: buf = BytesIO() img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1) img.save(buf, format="png") urlreq = server + "/api/node/" + uuid + "/" + tilename + "/tile/xy/" + str( levelnum) + "/" + str(startx) + "_" + str( starty) + "_" + str(zslice) s.post(urlreq, data=buf.getvalue()) buf.close() if createtilesjpeg: buf = BytesIO() img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1) img.save(buf, format="jpeg") urlreq = server + "/api/node/" + uuid + "/" + tilenamejpeg + "/tile/xy/" + str( levelnum) + "/" + str(startx) + "_" + str( starty) + "_" + str(zslice) s.post(urlreq, data=buf.getvalue()) buf.close() logger.info("Posted {} tiles (level={}) in {} seconds".format( num1tiles * num2tiles, levelnum, post_timer.seconds))
def reload_server_metadata(dvid_server): session = default_dvid_session() r = session.post("{}/api/server/reload-metadata".format(dvid_server)) r.raise_for_status()
def execute(self): """ Execute spark workflow. """ self._sanitize_config() session = default_dvid_session() dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] block_shape = 3 * (options["blocksize"], ) self.partition_size = options["blockwritelimit"] * options["blocksize"] # ?? num parallel requests might be really small at high levels of pyramids # xdim is unbounded or very large partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size) partition_schema = partitionSchema( partition_dims, blank_delimiter=options["blankdelimiter"], padding=options["blocksize"], enablemask=options["has-dvidmask"]) offset_zyx = np.array(options["offset"][::-1]) offset_zyx[0] += options["minslice"] imgreader = imagefileSrc(partition_schema, options["basename"], (options["minslice"], options["maxslice"]), VolumeOffset(*offset_zyx), self.sc) # !! hack: override iteration size that is set to partition size, TODO: add option # this just makes the downstream processing a little more convenient, and reduces # unnecessary DVID patching if that is enabled. # (must be a multiple of block size) imgreader.iteration_size = options["num-tasks"] # get dims from image (hackage) from PIL import Image import requests if '%' in options["basename"]: minslice_name = options["basename"] % options["minslice"] elif '{' in options["basename"]: minslice_name = options["basename"].format(options["minslice"]) else: raise RuntimeError( f"Unrecognized format string for image basename: {options['basename']}" ) img = Image.open(minslice_name) volume_shape = (1 + options["maxslice"] - options["minslice"], img.height, img.width) del img global_box_zyx = np.zeros((2, 3), dtype=int) global_box_zyx[0] = options["offset"] global_box_zyx[0] += (options["minslice"], 0, 0) global_box_zyx[1] = global_box_zyx[0] + volume_shape if options["create-pyramid"]: if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"]): logger.info( "'{dataname}' already exists, skipping creation".format( **dvid_info)) else: # create data instance and disable dvidmask # !! assume if data instance exists and mask is set that all pyramid # !! also exits, meaning the mask should be used. options["has-dvidmask"] = False if options["disable-original"]: logger.info( "Not creating '{dataname}' due to 'disable-original' config setting" .format(**dvid_info)) elif 0 in options["skipped-pyramid-levels"]: logger.info( "Not creating '{dataname}' due to 'skipped-pyramid-levels' config setting" .format(**dvid_info)) else: if options["is-rawarray"]: create_rawarray8(dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], block_shape) else: create_label_instance(dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], 0, block_shape) if not options["disable-original"] and 0 not in options[ "skipped-pyramid-levels"]: update_extents(dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], global_box_zyx) # Bottom level of pyramid is listed as neuroglancer-compatible extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dvid_info["dataname"]]) # determine number of pyramid levels if not specified if options["create-pyramid"] or options["create-pyramid-jpeg"]: if options["pyramid-depth"] == -1: options["pyramid-depth"] = 0 zsize = options["maxslice"] - options["minslice"] + 1 while zsize > 512: options["pyramid-depth"] += 1 zsize /= 2 # NeuTu doesn't work well if there aren't at least a few pyramid levels. # Even for small volumes, use at least a few pyramid levels, # unless the depth was explicit in the config. options["pyramid-depth"] = max(options["pyramid-depth"], 4) # create pyramid data instances if options["create-pyramid-jpeg"]: dataname_jpeg = dvid_info["dataname"] + self.JPEGPYRAMID_NAME if 0 in options["skipped-pyramid-levels"]: logger.info( "Not creating '{}' due to 'skipped-pyramid-levels' config setting" .format(dataname_jpeg)) else: if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg): logger.info( "'{}' already exists, skipping creation".format( dataname_jpeg)) else: create_rawarray8(dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg, block_shape, Compression.JPEG) update_extents(dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg, global_box_zyx) # Bottom level of pyramid is listed as neuroglancer-compatible extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dataname_jpeg]) if options["create-pyramid"]: for level in range(1, 1 + options["pyramid-depth"]): downsampled_box_zyx = global_box_zyx // (2**level) downname = dvid_info["dataname"] + "_%d" % level if level in options["skipped-pyramid-levels"]: logger.info( "Not creating '{}' due to 'skipped-pyramid-levels' config setting" .format(downname)) continue if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname): logger.info( "'{}' already exists, skipping creation".format( downname)) else: if options["is-rawarray"]: create_rawarray8(dvid_info["dvid-server"], dvid_info["uuid"], downname, block_shape) else: create_label_instance(dvid_info["dvid-server"], dvid_info["uuid"], downname, 0, block_shape) update_extents(dvid_info["dvid-server"], dvid_info["uuid"], downname, downsampled_box_zyx) # Higher-levels of the pyramid should not appear in the DVID-lite console. extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname]) if options["create-pyramid-jpeg"]: for level in range(1, 1 + options["pyramid-depth"]): downsampled_box_zyx = global_box_zyx // (2**level) downname = dvid_info[ "dataname"] + self.JPEGPYRAMID_NAME + "_%d" % level if level in options["skipped-pyramid-levels"]: logger.info( "Not creating '{}' due to 'skipped-pyramid-levels' config setting" .format(downname)) continue if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname): logger.info( "'{}' already exists, skipping creation".format( downname)) else: create_rawarray8(dvid_info["dvid-server"], dvid_info["uuid"], downname, block_shape, Compression.JPEG) update_extents(dvid_info["dvid-server"], dvid_info["uuid"], downname, downsampled_box_zyx) # Higher-levels of the pyramid should not appear in the DVID-lite console. extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname]) # create tiles if options["create-tiles"] or options["create-tiles-jpeg"]: MinTileCoord = global_box_zyx[0][::-1] // options["tilesize"] MaxTileCoord = global_box_zyx[1][::-1] // options["tilesize"] # get max level by just finding max tile coord maxval = max(MaxTileCoord) - min(MinTileCoord) + 1 import math self.maxlevel = int(math.log(maxval) / math.log(2)) tilemeta = {} tilemeta["MinTileCoord"] = MinTileCoord.tolist() tilemeta["MaxTileCoord"] = MaxTileCoord.tolist() tilemeta["Levels"] = {} currres = 8.0 # just use as placeholder for now for level in range(0, self.maxlevel + 1): tilemeta["Levels"][str(level)] = { "Resolution": 3 * [currres], "TileSize": 3 * [options["tilesize"]] } currres *= 2 if options["create-tiles"]: session.post("{dvid-server}/api/repo/{uuid}/instance".format( **dvid_info), json={ "typename": "imagetile", "dataname": dvid_info["dataname"] + self.TILENAME, "source": dvid_info["dataname"], "format": "png" }) session.post( "{dvid-server}/api/repo/{uuid}/{dataname}{tilename}/metadata" .format(tilename=self.TILENAME, **dvid_info), json=tilemeta) if options["create-tiles-jpeg"]: session.post("{dvid-server}/api/repo/{uuid}/instance".format( **dvid_info), json={ "typename": "imagetile", "dataname": dvid_info["dataname"] + self.JPEGTILENAME, "source": dvid_info["dataname"], "format": "jpg" }) session.post( "{dvid-server}/api/repo/{uuid}/{dataname_jpeg_tile}/metadata" .format(dataname_jpeg_tile=dvid_info["dataname"] + self.JPEGTILENAME, **dvid_info), json=tilemeta) if dvid_info["dvid-server"].startswith("http://127.0.0.1"): def reload_meta(): reload_server_metadata(dvid_info["dvid-server"]) self.run_on_each_worker(reload_meta) # TODO Validation: should verify syncs exist, should verify pyramid depth # TODO: set syncs for pyramids, tiles if base datatype exists # syncs should be removed before ingestion and added afterward levels_cache = {} # iterate through each partition for arraypartition in imgreader: # DVID pad if necessary if options["has-dvidmask"]: dvidsrc = dvidSrc(dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], arraypartition, resource_server=self.resource_server, resource_port=self.resource_port) arraypartition = dvidsrc.extract_volume() # potentially need for future iterations arraypartition.persist() # check for final layer finallayer = imgreader.curr_slice > imgreader.end_slice if not options["disable-original"]: # Write level-0 of the raw data, even if we aren't writing the rest of the pyramid. dataname = datanamelossy = None if options["create-pyramid"]: dataname = dvid_info["dataname"] if options["create-pyramid-jpeg"]: datanamelossy = dvid_info[ "dataname"] + self.JPEGPYRAMID_NAME if (dataname or datanamelossy ) and 0 not in options["skipped-pyramid-levels"]: self._write_blocks(arraypartition, dataname, datanamelossy) if options["create-tiles"] or options["create-tiles-jpeg"]: # repartition into tiles schema = partitionSchema(PartitionDims(1, 0, 0)) tilepartition = schema.partition_data(arraypartition) # write unpadded tilesize (will pad with delimiter if needed) self._writeimagepyramid(tilepartition) if options["create-pyramid"] or options["create-pyramid-jpeg"]: if 0 not in levels_cache: levels_cache[0] = [] levels_cache[0].append(arraypartition) curr_level = 1 downsample_factor = 2 # should be a multiple of Z blocks or the final fetch assert imgreader.curr_slice % options["blocksize"] == 0 while ((((imgreader.curr_slice // options["blocksize"]) % downsample_factor) == 0) or finallayer) and curr_level <= options["pyramid-depth"]: partlist = levels_cache[curr_level - 1] part = partlist[0] # union all RDDs from the same level for iter1 in range(1, len(partlist)): part = part.union(partlist[iter1]) # downsample map israw = options["is-rawarray"] def downsample(part_vol): part, vol = part_vol if not israw: vol = downsample_3Dlabels(vol)[0] else: vol = downsample_raw(vol)[0] return (part, vol) downsampled_array = part.map(downsample) # repart (vol and offset will always be power of two because of padding) def repartition_down(part_volume): part, volume = part_volume downsampled_offset = np.array(part.get_offset()) // 2 downsampled_reloffset = np.array( part.get_reloffset()) // 2 offsetnew = VolumeOffset(*downsampled_offset) reloffsetnew = VolumeOffset(*downsampled_reloffset) partnew = volumePartition( (offsetnew.z, offsetnew.y, offsetnew.x), offsetnew, reloffset=reloffsetnew) return partnew, volume downsampled_array = downsampled_array.map(repartition_down) # repartition downsample data partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size) schema = partitionSchema( partition_dims, blank_delimiter=options["blankdelimiter"], padding=options["blocksize"], enablemask=options["has-dvidmask"]) downsampled_array = schema.partition_data( downsampled_array) # persist before padding if there are more levels if curr_level < options["pyramid-depth"]: downsampled_array.persist() if curr_level not in levels_cache: levels_cache[curr_level] = [] levels_cache[curr_level].append(downsampled_array) # pad from DVID (move before persist will allow multi-ingest # but will lead to slightly non-optimal downsampling boundary # effects if using a lossy compression only. if options["has-dvidmask"]: padname = dvid_info["dataname"] if options[ "create-pyramid-jpeg"]: # !! should pad with orig if computing # pad with jpeg padname += self.JPEGPYRAMID_NAME padname += "_%d" % curr_level dvidsrc = dvidSrc(dvid_info["dvid-server"], dvid_info["uuid"], padname, downsampled_array, resource_server=self.resource_server, resource_port=self.resource_port) downsampled_array = dvidsrc.extract_volume() # write result downname = None downnamelossy = None if options["create-pyramid"]: downname = dvid_info["dataname"] + "_%d" % curr_level if options["create-pyramid-jpeg"]: downnamelossy = dvid_info[ "dataname"] + self.JPEGPYRAMID_NAME + "_%d" % curr_level if curr_level not in options["skipped-pyramid-levels"]: self._write_blocks(downsampled_array, downname, downnamelossy) # remove previous level del levels_cache[curr_level - 1] curr_level += 1 downsample_factor *= 2
def writeimagepyramid(vol3d): blknum, vol = vol3d from PIL import Image from scipy import ndimage import io import numpy s = default_dvid_session() # create thread pool for parallel from multiprocessing.dummy import Pool as ThreadPool NUM_THREADS = 4 ACTIVE_REQS = 16 #pool = ThreadPool(NUM_THREADS) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair s.post(urlreq , data=reqbuff) work_queue = [] # iterate slice by slice for slicenum in range(0, BLKSIZE): imslice = vol[slicenum, :, :] imlevels = [] imlevels.append(imslice) # use generic downsample algorithm for level in range(1, maxlevel+1): dim1, dim2 = imlevels[level-1].shape # go to max level regardless of actual image size #if dim1 < TILESIZE and dim2 < TILESIZE: # image size is already smaller even though not at max level #print "Not at max level" # break imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1-1) // TILESIZE + 1 num2tiles = (dim2-1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1*TILESIZE min2 = iter2*TILESIZE tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) if axis == "xy": work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum+blknum*BLKSIZE), buf.getvalue())) elif axis == "xz": work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter1), buf.getvalue())) else: work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue())) buf.close() # submit last jobs if any remain if len(work_queue) == ACTIVE_REQS: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() work_queue = [] # submit last jobs if any remain if len(work_queue) > 0: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join()
def execute(self): # tile size default TILESIZE = 512 server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) grayname = str(self.config_data["dvid-info"]["grayname"]) tilename = str(self.config_data["dvid-info"]["tilename"]) # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server xmin, ymin, zmin = 0, 0, 0 minslice = self.config_data["minslice"] maxslice = self.config_data["maxslice"] # map file to numpy array basename = self.config_data["basename"] # open image from PIL import Image import numpy session = default_dvid_session() img = Image.open(basename % minslice) xmax, ymax, zmax = img.width, img.height, maxslice # create tiles type and meta imformat = str(self.config_data["options"]["format"]) session.post(server + "/api/repo/" + uuid + "/instance", json={"typename": "imagetile", "dataname": tilename, "source": grayname, "format": imformat}) MinTileCoord = [xmin // TILESIZE, ymin // TILESIZE, zmin // TILESIZE] MaxTileCoord = [xmax // TILESIZE, ymax // TILESIZE, zmax // TILESIZE] # get max level by just finding max tile coord maxval = max(MaxTileCoord) + 1 import math maxlevel = int(math.log(maxval) / math.log(2)) tilemeta = {} tilemeta["MinTileCoord"] = MinTileCoord tilemeta["MaxTileCoord"] = MaxTileCoord tilemeta["Levels"] = {} currres = 10.0 # just use as placeholder for now for level in range(0, maxlevel+1): tilemeta["Levels"][str(level)] = { "Resolution" : [currres, currres, currres], "TileSize": [TILESIZE, TILESIZE, TILESIZE]} currres *= 2 session.post(server + "/api/node/" + uuid + "/" + tilename + "/metadata", json=tilemeta) # make each image a separate task imgs = self.sparkdvid_context.sc.parallelize(list(range(minslice, maxslice+1)), maxslice-minslice+1) def img2npy(slicenum): try: img = Image.open(basename % slicenum) return slicenum, numpy.array(img) except Exception as e: # could give empty image, but for now just fail raise npy_images = imgs.map(img2npy) appname = self.APPNAME resource_server = self.resource_server resource_port = self.resource_port def writeimagepyramid(image): slicenum, imnpy = image from PIL import Image from scipy import ndimage import io from libdvid import ConnectionMethod node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair node_service.custom_request(urlreq, reqbuff, ConnectionMethod.POST) #session.post(urlreq , data=reqbuff) work_queue = [] # iterate slice by slice imlevels = [] imlevels.append(imnpy) # use generic downsample algorithm for level in range(1, maxlevel+1): dim1, dim2 = imlevels[level-1].shape imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1-1) // TILESIZE + 1 num2tiles = (dim2-1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1*TILESIZE min2 = iter2*TILESIZE tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) loadTile((tilename + "/tile/xy/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum), buf.getvalue())) buf.close() npy_images.foreach(writeimagepyramid)
def writeimagepyramid(part_data): logger = logging.getLogger(__name__) part, vol = part_data offset = part.get_offset() zslice = offset.z from PIL import Image from scipy import ndimage import io s = default_dvid_session() # pad data with delimiter if needed timslice = vol[0, :, :] shiftx = offset.x % tilesize shifty = offset.y % tilesize tysize, txsize = timslice.shape ysize = tysize + shifty xsize = txsize + shiftx imslice = np.zeros((ysize, xsize)) imslice[:,:] = delimiter imslice[shifty:ysize, shiftx:xsize] = timslice curry = (offset.y - shifty) // 2 currx = (offset.x - shiftx) // 2 imlevels = [] tileoffsetyx = [] imlevels.append(imslice) tileoffsetyx.append((offset.y // tilesize, offset.x // tilesize)) with Timer() as downsample_timer: # use generic downsample algorithm for level in range(1, maxlevel+1): tysize, txsize = imlevels[level-1].shape shiftx = currx % tilesize shifty = curry % tilesize ysize = tysize + shifty xsize = txsize + shiftx imslice = np.zeros((ysize, xsize)) imslice[:,:] = delimiter timslice = ndimage.interpolation.zoom(imlevels[level-1], 0.5) imslice[shifty:ysize, shiftx:xsize] = timslice imlevels.append(imslice) tileoffsetyx.append((currx // tilesize, curry // tilesize)) curry = (curry - shifty) // 2 currx = (currx - shiftx) // 2 logger.info("Downsampled {} levels in {:.3f} seconds".format(maxlevel, downsample_timer.seconds)) # write tile pyramid using custom requests for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1-1) // tilesize + 1 num2tiles = (dim2-1) // tilesize + 1 with Timer() as post_timer: for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = np.zeros((tilesize, tilesize), np.uint8) tileholder[:,:] = delimiter min1 = iter1*tilesize min2 = iter2*tilesize tileslice = levelslice[min1:min1+tilesize, min2:min2+tilesize] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice starty, startx = tileoffsetyx[levelnum] starty += iter1 startx += iter2 if createtiles: buf = BytesIO() img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1) img.save(buf, format="png") urlreq = server + "/api/node/" + uuid + "/" + tilename + "/tile/xy/" + str(levelnum) + "/" + str(startx) + "_" + str(starty) + "_" + str(zslice) s.post(urlreq , data=buf.getvalue()) buf.close() if createtilesjpeg: buf = BytesIO() img = Image.frombuffer('L', (tilesize, tilesize), tileholder.tostring(), 'raw', 'L', 0, 1) img.save(buf, format="jpeg") urlreq = server + "/api/node/" + uuid + "/" + tilenamejpeg + "/tile/xy/" + str(levelnum) + "/" + str(startx) + "_" + str(starty) + "_" + str(zslice) s.post(urlreq , data=buf.getvalue()) buf.close() logger.info("Posted {} tiles (level={}) in {} seconds".format( num1tiles*num2tiles, levelnum, post_timer.seconds ) )
def execute(self): server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) source = str(self.config_data["dvid-info"]["source"]) session = default_dvid_session() # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server req = session.get(server + "/api/node/" + uuid + "/" + source + "/info") sourcemeta = req.json() # xmin, ymin, zmin not being used explicitly yet #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"] xmin, ymin, zmin = 0, 0, 0 xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"] islabelblk = False datatype = sourcemeta["Extended"]["Values"][0]["Label"] if str(datatype) == "labelblk": islabelblk = True # !! always assume isotropic block BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0]) maxdim = max(xmax, ymax, zmax) # build pyramid until BLKSIZE * 4 import math maxlevel = int(math.log(maxdim + 1) / math.log(2)) - 2 # assume 0,0,0 start for now xspan, yspan, zspan = xmax + 1, ymax + 1, zmax + 1 xrunlimit = self.config_data["options"]["xrunlimit"] xrunlimit = xrunlimit + (xrunlimit % 2) # should be even currsource = source # create source pyramid and append _level to name for level in range(1, maxlevel + 1): node_service = retrieve_node_service(server, uuid, self.resource_server, self.resource_port, self.APPNAME) # !! limit to grayscale now prevsource = currsource currsource = source + ("_%d" % level) # TODO: set voxel resolution to base dataset (not too important in current workflows) if islabelblk: node_service.create_labelblk(currsource, None, BLKSIZE) else: node_service.create_grayscale8(currsource, BLKSIZE) # set extents for new volume (only need to do for grayscale) newsourceext = {} newsourceext["MinPoint"] = [0, 0, 0] # for now no offset newsourceext["MaxPoint"] = [ ((xspan - 1) // 2 + 1) * BLKSIZE - 1, ((yspan - 1) // 2 + 1) * BLKSIZE - 1, ((zspan - 1) // 2 + 1) * BLKSIZE - 1 ] session.post(server + "/api/node/" + uuid + "/" + currsource + "/extents", json=newsourceext) # determine number of requests maxxrun = xspan if xrunlimit > 0 and xrunlimit < xspan: maxxrun = xrunlimit if maxxrun % 2: maxxrun += 1 xsize = xspan // maxxrun if xspan % maxxrun: xsize += 1 ysize = (yspan + 1) // 2 zsize = (zspan + 1) // 2 resource_server = self.resource_server resource_port = self.resource_port for ziter2 in range(0, zsize, 2): workqueue = [] for yiter in range(0, ysize): for xiter in range(0, xsize): for miniz in range(ziter2, ziter2 + 2): workqueue.append((xiter, yiter, miniz)) # parallelize jobs pieces = self.sc.parallelize(workqueue, len(workqueue)) # grab data corresponding to xrun def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service( server, uuid, resource_server, resource_port) shape_zyx = (BLKSIZE * 2, BLKSIZE * 2, maxxrun * BLKSIZE) offset_zyx = (ziter * BLKSIZE * 2, yiter * BLKSIZE * 2, xiter * BLKSIZE * maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx) volumedata = pieces.map(retrievedata) # downsample gray data def downsamplegray(vdata): coords, data = vdata from scipy import ndimage data = ndimage.interpolation.zoom(data, 0.5) return (coords, data) # downsample label data (TODO: make faster) def downsamplelabels(vdata): coords, data = vdata import numpy zmax, ymax, xmax = data.shape data2 = numpy.zeros( (zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64) for ziter in range(0, zmax, 2): for yiter in range(0, ymax, 2): for xiter in range(0, xmax, 2): v1 = data[ziter, yiter, xiter] v2 = data[ziter, yiter, xiter + 1] v3 = data[ziter, yiter + 1, xiter] v4 = data[ziter, yiter + 1, xiter + 1] v5 = data[ziter + 1, yiter, xiter] v6 = data[ziter + 1, yiter, xiter + 1] v7 = data[ziter + 1, yiter + 1, xiter] v8 = data[ziter + 1, yiter + 1, xiter + 1] freqs = {} freqs[v2] = 0 freqs[v3] = 0 freqs[v4] = 0 freqs[v5] = 0 freqs[v6] = 0 freqs[v7] = 0 freqs[v8] = 0 freqs[v1] = 1 freqs[v2] += 1 freqs[v3] += 1 freqs[v4] += 1 freqs[v5] += 1 freqs[v6] += 1 freqs[v7] += 1 freqs[v8] += 1 maxval = 0 freqkey = 0 for key, val in freqs.items(): if val > maxval: maxval = val freqkey = key data2[ziter // 2, yiter // 2, xiter // 2] = freqkey return (coords, data2) downsampleddata = None if islabelblk: downsampleddata = volumedata.map(downsamplelabels) else: downsampleddata = volumedata.map(downsamplegray) appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] # write results ?! def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize, ysize, xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter * maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:, :, iterx:iterx + BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter * maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) downsampleddata.foreach(write2dvid) # adjust max coordinate for new level xspan = (xspan - 1) // 2 yspan = (yspan - 1) // 2 zspan = (zspan - 1) // 2
def execute(self): # tile size default TILESIZE = 512 server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) grayname = str(self.config_data["dvid-info"]["grayname"]) tilename = str(self.config_data["dvid-info"]["tilename"]) resource_server = self.resource_server resource_port = self.resource_port # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() req = session.get(server + "/api/node/" + uuid + "/" + grayname + "/info") graymeta = req.json() xmin, ymin, zmin = graymeta["Extended"]["MinIndex"] xmax, ymax, zmax = graymeta["Extended"]["MaxIndex"] # !! always assume isotropic block BLKSIZE = int(graymeta["Extended"]["BlockSize"][0]) imformat = str(self.config_data["options"]["format"]) # create tiles type and meta session.post(server + "/api/repo/" + uuid + "/instance", json={"typename": "imagetile", "dataname": tilename, "source": grayname, "format": imformat}) MinTileCoord = [xmin*BLKSIZE/TILESIZE, ymin*BLKSIZE/TILESIZE, zmin*BLKSIZE/TILESIZE] MaxTileCoord = [xmax*BLKSIZE/TILESIZE, ymax*BLKSIZE/TILESIZE, zmax*BLKSIZE/TILESIZE] # get max level by just finding max tile coord maxval = max(MaxTileCoord) minval = abs(min(MinTileCoord)) maxval = max(minval, maxval) + 1 import math maxlevel = int(math.log(maxval) / math.log(2)) tilemeta = {} tilemeta["MinTileCoord"] = MinTileCoord tilemeta["MaxTileCoord"] = MaxTileCoord tilemeta["Levels"] = {} currres = 10.0 # just use as placeholder for now for level in range(0, maxlevel+1): tilemeta["Levels"][str(level)] = { "Resolution" : [currres, currres, currres], "TileSize": [TILESIZE, TILESIZE, TILESIZE]} currres *= 2 session.post(server + "/api/node/" + uuid + "/" + tilename + "/metadata", json=tilemeta) numiters = zmax+1 axis = str(self.config_data["options"]["axis"]) if axis == "xz": numiters = ymax+1 elif axis == "yz": numiters = xmax+1 # retrieve 32 slices at a time and generate all tiles # TODO: only fetch 1 slice at a time if 32 slices cannot fit in memory blkiters = self.sparkdvid_context.sc.parallelize(list(range(0,numiters)), numiters) def retrieveslices(blknum): # grab slice with 3d volume call node_service = retrieve_node_service(server, uuid, resource_server, resource_port) vol = None if resource_server != "": # Note: libdvid uses zyx order for python functions if axis == "xy": shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx elif axis == "xz": shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False ) vol_yzx = vol_zyx.transpose((1,0,2)) vol = vol_yzx else: shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE ) offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE ) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False ) vol = vol_zyx.transpose((2,0,1)) else: if axis == "xy": shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx) vol = vol_zyx elif axis == "xz": shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx ) vol_yzx = vol_zyx.transpose((1,0,2)) vol = vol_yzx else: shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE ) offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE ) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx ) vol = vol_zyx.transpose((2,0,1)) return (blknum, vol) imagedata = blkiters.map(retrieveslices) # ?! assume 0,0 starting coordinate for now for debuggin simplicity def writeimagepyramid(vol3d): blknum, vol = vol3d from PIL import Image from scipy import ndimage import io import numpy s = default_dvid_session() # create thread pool for parallel from multiprocessing.dummy import Pool as ThreadPool NUM_THREADS = 4 ACTIVE_REQS = 16 #pool = ThreadPool(NUM_THREADS) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair s.post(urlreq , data=reqbuff) work_queue = [] # iterate slice by slice for slicenum in range(0, BLKSIZE): imslice = vol[slicenum, :, :] imlevels = [] imlevels.append(imslice) # use generic downsample algorithm for level in range(1, maxlevel+1): dim1, dim2 = imlevels[level-1].shape # go to max level regardless of actual image size #if dim1 < TILESIZE and dim2 < TILESIZE: # image size is already smaller even though not at max level #print "Not at max level" # break imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1-1) // TILESIZE + 1 num2tiles = (dim2-1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1*TILESIZE min2 = iter2*TILESIZE tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) if axis == "xy": work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum+blknum*BLKSIZE), buf.getvalue())) elif axis == "xz": work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter1), buf.getvalue())) else: work_queue.append((server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum+blknum*BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue())) buf.close() # submit last jobs if any remain if len(work_queue) == ACTIVE_REQS: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() work_queue = [] # submit last jobs if any remain if len(work_queue) > 0: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() imagedata.foreach(writeimagepyramid)
def execute(self): from PIL import Image import numpy import os iterslices = self.BLKSIZE * self.config_data["options"][ "numblocklayers"] minslice = self.config_data["minslice"] # map file to numpy array basename = self.config_data["basename"] # format should be gs://<bucket>/path gbucketname = "" gpath = "" if basename.startswith('gs://'): # parse google bucket names tempgs = basename.split('//') bucketpath = tempgs[1].split('/') gbucketname = bucketpath[0] gpath = '/'.join(bucketpath[1:]) server = None xoffset = yoffset = zoffset = 0 if "offset" in self.config_data["options"]: xoffset = self.config_data["options"]["offset"][0] yoffset = self.config_data["options"]["offset"][1] zoffset = self.config_data["options"]["offset"][2] if xoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if yoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if zoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") xoffset /= self.BLKSIZE yoffset /= self.BLKSIZE zoffset /= self.BLKSIZE # this will start the Z block writing at the specified offse # (changes default behavior when loading nonzero starting image slice) zoffset -= (minslice // self.BLKSIZE) # create metadata before workers start if using DVID if "output-dir" not in self.config_data or self.config_data[ "output-dir"] == "": # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] resource_server = str(self.resource_server) resource_port = self.resource_port # create grayscale type node_service = retrieve_node_service(server, uuid, resource_server, resource_port, self.APPNAME) node_service.create_grayscale8(str(grayname), self.BLKSIZE) for slice in range(self.config_data["minslice"], self.config_data["maxslice"] + 1, iterslices): # parallelize images across many machines imgs = self.sc.parallelize(list(range(slice, slice + iterslices)), iterslices) def img2npy(slicenum): try: img = None if gbucketname == "": img = Image.open(basename % slicenum) else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % slicenum) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) return slicenum, numpy.array(img) except Exception as e: # just return a blank slice -- will be handled downstream return slicenum, numpy.zeros((0, 0), numpy.uint8) npy_images = imgs.map(img2npy) # map numpy array into y lines of block height blocksize = self.BLKSIZE blocklimit = self.BLOCKLIMIT def npy2lines(arrpair): z, arr = arrpair ysize, xsize = arr.shape npylines = [] for itery in range(0, ysize, blocksize): line = numpy.zeros( (blocksize, ((xsize - 1) // blocksize + 1) * blocksize), numpy.uint8) uppery = blocksize if (itery + blocksize) > ysize: uppery = ysize - itery line[0:uppery, 0:xsize] = arr[itery:itery + blocksize, 0:xsize] npylines.append((itery // blocksize, (z, line))) return npylines npy_lines = npy_images.flatMap(npy2lines) # reduce y lines into DVID blocks groupedlines = npy_lines.groupByKey() # map y lines => (y, blocks) def lines2blocks(linespair): y, linesp = linespair xsize = None blockdata = None for z, line in linesp: if xsize is None: _, xsize = line.shape blockdata = numpy.zeros((iterslices, blocksize, xsize), numpy.uint8) blockdata[(z - minslice) % iterslices, :, :] = line return y, blockdata yblocks = groupedlines.map(lines2blocks) # map multilayer of blocks to an array of single layer blocks def multi2single(yblocks): ybindex, blocks = yblocks blockarr = [] num_layers = iterslices // blocksize for layer in range(0, num_layers): blockarr.append( ((ybindex, layer), blocks[layer * blocksize:(layer * blocksize + blocksize), :, :])) return blockarr yblockssplit = yblocks.flatMap(multi2single) if "output-dir" in self.config_data and self.config_data[ "output-dir"] != "": # write blocks to disk for separte post-process -- write directly to DVID eventually? output_dir = self.config_data["output-dir"] def write2disk(yblocks): zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize, ysize, xsize = blocks.shape outdir = output_dir outdir += "/" + ("%05d" % zbindex) + ".z/" filename = outdir + ("%05d" % ybindex) + "-" + str( xsize // blocksize) + ".blocks" try: os.makedirs(outdir) except Exception as e: pass # extract blocks from buffer and write to disk fout = open(filename, 'wb') for iterx in range(0, xsize, blocksize): block = blocks[:, :, iterx:iterx + blocksize].copy() fout.write(block) fout.close() yblockssplit.foreach(write2disk) else: # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize, ysize, xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:, :, iterx:iterx + blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) yblockssplit.foreach(write2dvid) self.workflow_entry_exit_printer.write_data("Ingested %d slices" % iterslices) # just fetch one image at driver to get dims width = height = 1 try: img = None if gbucketname == "": img = Image.open(basename % minslice) width, height = img.width, img.height else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % minslice) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) width, height = img.width, img.height except Exception as e: # just set size to 1 pass if "output-dir" not in self.config_data or self.config_data[ "output-dir"] == "": # update metadata grayext = {} grayext["MinPoint"] = [ xoffset * self.BLKSIZE, yoffset * self.BLKSIZE, zoffset * self.BLKSIZE + minslice ] grayext["MaxPoint"] = [ xoffset * self.BLKSIZE + width - 1, yoffset * self.BLKSIZE + height - 1, zoffset * self.BLKSIZE + minslice + self.config_data["maxslice"] ] if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() session.post(server + "/api/node/" + uuid + "/" + grayname + "/extents", json=grayext)
def execute(self): # tile size default TILESIZE = 512 server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) grayname = str(self.config_data["dvid-info"]["grayname"]) tilename = str(self.config_data["dvid-info"]["tilename"]) resource_server = self.resource_server resource_port = self.resource_port # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() req = session.get(server + "/api/node/" + uuid + "/" + grayname + "/info") graymeta = req.json() xmin, ymin, zmin = graymeta["Extended"]["MinIndex"] xmax, ymax, zmax = graymeta["Extended"]["MaxIndex"] # !! always assume isotropic block BLKSIZE = int(graymeta["Extended"]["BlockSize"][0]) imformat = str(self.config_data["options"]["format"]) # create tiles type and meta session.post(server + "/api/repo/" + uuid + "/instance", json={ "typename": "imagetile", "dataname": tilename, "source": grayname, "format": imformat }) MinTileCoord = [ xmin * BLKSIZE / TILESIZE, ymin * BLKSIZE / TILESIZE, zmin * BLKSIZE / TILESIZE ] MaxTileCoord = [ xmax * BLKSIZE / TILESIZE, ymax * BLKSIZE / TILESIZE, zmax * BLKSIZE / TILESIZE ] # get max level by just finding max tile coord maxval = max(MaxTileCoord) minval = abs(min(MinTileCoord)) maxval = max(minval, maxval) + 1 import math maxlevel = int(math.log(maxval) / math.log(2)) tilemeta = {} tilemeta["MinTileCoord"] = MinTileCoord tilemeta["MaxTileCoord"] = MaxTileCoord tilemeta["Levels"] = {} currres = 10.0 # just use as placeholder for now for level in range(0, maxlevel + 1): tilemeta["Levels"][str(level)] = { "Resolution": [currres, currres, currres], "TileSize": [TILESIZE, TILESIZE, TILESIZE] } currres *= 2 session.post(server + "/api/node/" + uuid + "/" + tilename + "/metadata", json=tilemeta) numiters = zmax + 1 axis = str(self.config_data["options"]["axis"]) if axis == "xz": numiters = ymax + 1 elif axis == "yz": numiters = xmax + 1 # retrieve 32 slices at a time and generate all tiles # TODO: only fetch 1 slice at a time if 32 slices cannot fit in memory blkiters = self.sparkdvid_context.sc.parallelize( list(range(0, numiters)), numiters) def retrieveslices(blknum): # grab slice with 3d volume call node_service = retrieve_node_service(server, uuid, resource_server, resource_port) vol = None if resource_server != "": # Note: libdvid uses zyx order for python functions if axis == "xy": shape_zyx = (BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx elif axis == "xz": shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol_yzx = vol_zyx.transpose((1, 0, 2)) vol = vol_yzx else: shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, BLKSIZE) offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE, blknum * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx.transpose((2, 0, 1)) else: if axis == "xy": shape_zyx = (BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol = vol_zyx elif axis == "xz": shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol_yzx = vol_zyx.transpose((1, 0, 2)) vol = vol_yzx else: shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, BLKSIZE) offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE, blknum * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol = vol_zyx.transpose((2, 0, 1)) return (blknum, vol) imagedata = blkiters.map(retrieveslices) # ?! assume 0,0 starting coordinate for now for debuggin simplicity def writeimagepyramid(vol3d): blknum, vol = vol3d from PIL import Image from scipy import ndimage import io import numpy s = default_dvid_session() # create thread pool for parallel from multiprocessing.dummy import Pool as ThreadPool NUM_THREADS = 4 ACTIVE_REQS = 16 #pool = ThreadPool(NUM_THREADS) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair s.post(urlreq, data=reqbuff) work_queue = [] # iterate slice by slice for slicenum in range(0, BLKSIZE): imslice = vol[slicenum, :, :] imlevels = [] imlevels.append(imslice) # use generic downsample algorithm for level in range(1, maxlevel + 1): dim1, dim2 = imlevels[level - 1].shape # go to max level regardless of actual image size #if dim1 < TILESIZE and dim2 < TILESIZE: # image size is already smaller even though not at max level #print "Not at max level" # break imlevels.append( ndimage.interpolation.zoom(imlevels[level - 1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1 - 1) // TILESIZE + 1 num2tiles = (dim2 - 1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1 * TILESIZE min2 = iter2 * TILESIZE tileslice = levelslice[min1:min1 + TILESIZE, min2:min2 + TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) if axis == "xy": work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum + blknum * BLKSIZE), buf.getvalue())) elif axis == "xz": work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(iter2) + "_" + str(slicenum + blknum * BLKSIZE) + "_" + str(iter1), buf.getvalue())) else: work_queue.append( (server + "/api/node/" + uuid + "/" + tilename + "/tile/" + axis + "/" + str(levelnum) + "/" + str(slicenum + blknum * BLKSIZE) + "_" + str(iter2) + "_" + str(iter1), buf.getvalue())) buf.close() # submit last jobs if any remain if len(work_queue) == ACTIVE_REQS: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() work_queue = [] # submit last jobs if any remain if len(work_queue) > 0: pool = ThreadPool(NUM_THREADS) pool.map(loadTile, work_queue) # close the pool to further requests pool.close() # wait for any remaining threads pool.join() imagedata.foreach(writeimagepyramid)
def execute(self): server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) source = str(self.config_data["dvid-info"]["source"]) session = default_dvid_session() # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server req = session.get(server + "/api/node/" + uuid + "/" + source + "/info") sourcemeta = req.json() # xmin, ymin, zmin not being used explicitly yet #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"] xmin, ymin, zmin = 0, 0, 0 xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"] islabelblk = False datatype = sourcemeta["Extended"]["Values"][0]["Label"] if str(datatype) == "labelblk": islabelblk = True # !! always assume isotropic block BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0]) maxdim = max(xmax,ymax,zmax) # build pyramid until BLKSIZE * 4 import math maxlevel = int(math.log(maxdim+1) / math.log(2)) - 2 # assume 0,0,0 start for now xspan, yspan, zspan = xmax+1, ymax+1, zmax+1 xrunlimit = self.config_data["options"]["xrunlimit"] xrunlimit = xrunlimit + (xrunlimit % 2) # should be even currsource = source # create source pyramid and append _level to name for level in range(1, maxlevel+1): node_service = retrieve_node_service(server, uuid, self.resource_server, self.resource_port, self.APPNAME) # !! limit to grayscale now prevsource = currsource currsource = source + ("_%d" % level) # TODO: set voxel resolution to base dataset (not too important in current workflows) if islabelblk: node_service.create_labelblk(currsource, None, BLKSIZE) else: node_service.create_grayscale8(currsource, BLKSIZE) # set extents for new volume (only need to do for grayscale) newsourceext = {} newsourceext["MinPoint"] = [0,0,0] # for now no offset newsourceext["MaxPoint"] = [((xspan-1) // 2+1)*BLKSIZE-1,((yspan-1) // 2+1)*BLKSIZE-1,((zspan-1) // 2+1)*BLKSIZE-1] session.post(server + "/api/node/" + uuid + "/" + currsource + "/extents", json=newsourceext) # determine number of requests maxxrun = xspan if xrunlimit > 0 and xrunlimit < xspan: maxxrun = xrunlimit if maxxrun % 2: maxxrun += 1 xsize = xspan // maxxrun if xspan % maxxrun: xsize += 1 ysize = (yspan+1) // 2 zsize = (zspan+1) // 2 resource_server = self.resource_server resource_port = self.resource_port for ziter2 in range(0, zsize, 2): workqueue = [] for yiter in range(0, ysize): for xiter in range(0, xsize): for miniz in range(ziter2, ziter2+2): workqueue.append((xiter,yiter,miniz)) # parallelize jobs pieces = self.sc.parallelize(workqueue, len(workqueue)) # grab data corresponding to xrun def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service(server, uuid, resource_server, resource_port) shape_zyx = ( BLKSIZE*2, BLKSIZE*2, maxxrun*BLKSIZE ) offset_zyx = (ziter*BLKSIZE*2, yiter*BLKSIZE*2, xiter*BLKSIZE*maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx) volumedata = pieces.map(retrievedata) # downsample gray data def downsamplegray(vdata): coords, data = vdata from scipy import ndimage data = ndimage.interpolation.zoom(data, 0.5) return (coords, data) # downsample label data (TODO: make faster) def downsamplelabels(vdata): coords, data = vdata import numpy zmax, ymax, xmax = data.shape data2 = numpy.zeros((zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64) for ziter in range(0,zmax,2): for yiter in range(0, ymax,2): for xiter in range(0,xmax,2): v1 = data[ziter, yiter, xiter] v2 = data[ziter, yiter, xiter+1] v3 = data[ziter, yiter+1, xiter] v4 = data[ziter, yiter+1, xiter+1] v5 = data[ziter+1, yiter, xiter] v6 = data[ziter+1, yiter, xiter+1] v7 = data[ziter+1, yiter+1, xiter] v8 = data[ziter+1, yiter+1, xiter+1] freqs = {} freqs[v2] = 0 freqs[v3] = 0 freqs[v4] = 0 freqs[v5] = 0 freqs[v6] = 0 freqs[v7] = 0 freqs[v8] = 0 freqs[v1] = 1 freqs[v2] += 1 freqs[v3] += 1 freqs[v4] += 1 freqs[v5] += 1 freqs[v6] += 1 freqs[v7] += 1 freqs[v8] += 1 maxval = 0 freqkey = 0 for key, val in freqs.items(): if val > maxval: maxval = val freqkey = key data2[ziter // 2, yiter // 2, xiter // 2] = freqkey return (coords, data2) downsampleddata = None if islabelblk: downsampleddata = volumedata.map(downsamplelabels) else: downsampleddata = volumedata.map(downsamplegray) appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] # write results ?! def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize,ysize,xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter*maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:,:,iterx:iterx+BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter*maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) downsampleddata.foreach(write2dvid) # adjust max coordinate for new level xspan = (xspan-1) // 2 yspan = (yspan-1) // 2 zspan = (zspan-1) // 2
def post_meshes_to_dvid(config, instance_name, partition_items): """ Send the given meshes (either .obj or .drc) as key/value pairs to DVID. Args: config: The CreateMeshes workflow config data instance_name: key-value instance to post to partition_items: tuple (group_id, [(segment_id, mesh_data), (segment_id, mesh_data)]) """ # Re-use session for connection pooling. session = default_dvid_session() # Re-use resource manager client connections, too. # (If resource-server is empty, this will return a "dummy client") resource_client = ResourceManagerClient( config["options"]["resource-server"], config["options"]["resource-port"]) dvid_server = config["dvid-info"]["dvid"]["server"] uuid = config["dvid-info"]["dvid"]["uuid"] grouping_scheme = config["mesh-config"]["storage"]["grouping-scheme"] mesh_format = config["mesh-config"]["storage"]["format"] if grouping_scheme == "no-groups": for group_id, segment_ids_and_meshes in partition_items: for (segment_id, mesh_data) in segment_ids_and_meshes: @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_mesh(): with resource_client.access_context( dvid_server, False, 2, len(mesh_data)): session.post( f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}', mesh_data) session.post( f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{segment_id}_info', json={'format': mesh_format}) write_mesh() else: # All other grouping schemes, including 'singletons' write tarballs. # (In the 'singletons' case, there is just one tarball per body.) for group_id, segment_ids_and_meshes in partition_items: tar_name = _get_group_name(config, group_id) tar_stream = BytesIO() with closing(tarfile.open(tar_name, 'w', tar_stream)) as tf: for (segment_id, mesh_data) in segment_ids_and_meshes: mesh_name = _get_mesh_name(config, segment_id) f_info = tarfile.TarInfo(mesh_name) f_info.size = len(mesh_data) tf.addfile(f_info, BytesIO(mesh_data)) tar_bytes = tar_stream.getbuffer() @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def write_tar(): with resource_client.access_context(dvid_server, False, 1, len(tar_bytes)): session.post( f'{dvid_server}/api/node/{uuid}/{instance_name}/key/{tar_name}', tar_bytes) write_tar()
def execute(self): """ Execute spark workflow. """ self._sanitize_config() session = default_dvid_session() dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] block_shape = 3*(options["blocksize"],) self.partition_size = options["blockwritelimit"] * options["blocksize"] # ?? num parallel requests might be really small at high levels of pyramids # xdim is unbounded or very large partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size) partition_schema = partitionSchema( partition_dims, blank_delimiter=options["blankdelimiter"], padding=options["blocksize"], enablemask=options["has-dvidmask"]) offset_zyx = np.array( options["offset"][::-1] ) offset_zyx[0] += options["minslice"] imgreader = imagefileSrc( partition_schema, options["basename"], (options["minslice"], options["maxslice"]), VolumeOffset(*offset_zyx), self.sc ) # !! hack: override iteration size that is set to partition size, TODO: add option # this just makes the downstream processing a little more convenient, and reduces # unnecessary DVID patching if that is enabled. # (must be a multiple of block size) imgreader.iteration_size = options["num-tasks"] # get dims from image (hackage) from PIL import Image import requests if '%' in options["basename"]: minslice_name = options["basename"] % options["minslice"] elif '{' in options["basename"]: minslice_name = options["basename"].format(options["minslice"]) else: raise RuntimeError(f"Unrecognized format string for image basename: {options['basename']}") img = Image.open(minslice_name) volume_shape = (1 + options["maxslice"] - options["minslice"], img.height, img.width) del img global_box_zyx = np.zeros((2,3), dtype=int) global_box_zyx[0] = options["offset"] global_box_zyx[0] += (options["minslice"], 0, 0) global_box_zyx[1] = global_box_zyx[0] + volume_shape if options["create-pyramid"]: if is_datainstance( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"] ): logger.info("'{dataname}' already exists, skipping creation".format(**dvid_info) ) else: # create data instance and disable dvidmask # !! assume if data instance exists and mask is set that all pyramid # !! also exits, meaning the mask should be used. options["has-dvidmask"] = False if options["disable-original"]: logger.info("Not creating '{dataname}' due to 'disable-original' config setting".format(**dvid_info) ) elif 0 in options["skipped-pyramid-levels"]: logger.info("Not creating '{dataname}' due to 'skipped-pyramid-levels' config setting".format(**dvid_info) ) else: if options["is-rawarray"]: create_rawarray8( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], block_shape ) else: create_label_instance( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], 0, block_shape ) if not options["disable-original"] and 0 not in options["skipped-pyramid-levels"]: update_extents( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], global_box_zyx ) # Bottom level of pyramid is listed as neuroglancer-compatible extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dvid_info["dataname"]]) # determine number of pyramid levels if not specified if options["create-pyramid"] or options["create-pyramid-jpeg"]: if options["pyramid-depth"] == -1: options["pyramid-depth"] = 0 zsize = options["maxslice"] - options["minslice"] + 1 while zsize > 512: options["pyramid-depth"] += 1 zsize /= 2 # NeuTu doesn't work well if there aren't at least a few pyramid levels. # Even for small volumes, use at least a few pyramid levels, # unless the depth was explicit in the config. options["pyramid-depth"] = max(options["pyramid-depth"], 4) # create pyramid data instances if options["create-pyramid-jpeg"]: dataname_jpeg = dvid_info["dataname"] + self.JPEGPYRAMID_NAME if 0 in options["skipped-pyramid-levels"]: logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(dataname_jpeg) ) else: if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg): logger.info("'{}' already exists, skipping creation".format(dataname_jpeg) ) else: create_rawarray8( dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg, block_shape, Compression.JPEG ) update_extents( dvid_info["dvid-server"], dvid_info["uuid"], dataname_jpeg, global_box_zyx ) # Bottom level of pyramid is listed as neuroglancer-compatible extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'neuroglancer', [dataname_jpeg]) if options["create-pyramid"]: for level in range(1, 1 + options["pyramid-depth"]): downsampled_box_zyx = global_box_zyx // (2**level) downname = dvid_info["dataname"] + "_%d" % level if level in options["skipped-pyramid-levels"]: logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(downname) ) continue if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname): logger.info("'{}' already exists, skipping creation".format(downname) ) else: if options["is-rawarray"]: create_rawarray8( dvid_info["dvid-server"], dvid_info["uuid"], downname, block_shape ) else: create_label_instance( dvid_info["dvid-server"], dvid_info["uuid"], downname, 0, block_shape ) update_extents( dvid_info["dvid-server"], dvid_info["uuid"], downname, downsampled_box_zyx ) # Higher-levels of the pyramid should not appear in the DVID-lite console. extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname]) if options["create-pyramid-jpeg"]: for level in range(1, 1 + options["pyramid-depth"]): downsampled_box_zyx = global_box_zyx // (2**level) downname = dvid_info["dataname"] + self.JPEGPYRAMID_NAME + "_%d" % level if level in options["skipped-pyramid-levels"]: logger.info("Not creating '{}' due to 'skipped-pyramid-levels' config setting".format(downname) ) continue if is_datainstance(dvid_info["dvid-server"], dvid_info["uuid"], downname): logger.info("'{}' already exists, skipping creation".format(downname) ) else: create_rawarray8( dvid_info["dvid-server"], dvid_info["uuid"], downname, block_shape, Compression.JPEG ) update_extents( dvid_info["dvid-server"], dvid_info["uuid"], downname, downsampled_box_zyx ) # Higher-levels of the pyramid should not appear in the DVID-lite console. extend_list_value(dvid_info["dvid-server"], dvid_info["uuid"], '.meta', 'restrictions', [downname]) # create tiles if options["create-tiles"] or options["create-tiles-jpeg"]: MinTileCoord = global_box_zyx[0][::-1] // options["tilesize"] MaxTileCoord = global_box_zyx[1][::-1] // options["tilesize"] # get max level by just finding max tile coord maxval = max(MaxTileCoord) - min(MinTileCoord) + 1 import math self.maxlevel = int(math.log(maxval) / math.log(2)) tilemeta = {} tilemeta["MinTileCoord"] = MinTileCoord.tolist() tilemeta["MaxTileCoord"] = MaxTileCoord.tolist() tilemeta["Levels"] = {} currres = 8.0 # just use as placeholder for now for level in range(0, self.maxlevel+1): tilemeta["Levels"][str(level)] = { "Resolution" : 3*[currres], "TileSize": 3*[options["tilesize"]] } currres *= 2 if options["create-tiles"]: session.post("{dvid-server}/api/repo/{uuid}/instance".format(**dvid_info), json={"typename": "imagetile", "dataname": dvid_info["dataname"]+self.TILENAME, "source": dvid_info["dataname"], "format": "png"}) session.post("{dvid-server}/api/repo/{uuid}/{dataname}{tilename}/metadata".format(tilename=self.TILENAME, **dvid_info), json=tilemeta) if options["create-tiles-jpeg"]: session.post("{dvid-server}/api/repo/{uuid}/instance".format(**dvid_info), json={ "typename": "imagetile", "dataname": dvid_info["dataname"]+self.JPEGTILENAME, "source": dvid_info["dataname"], "format": "jpg"} ) session.post("{dvid-server}/api/repo/{uuid}/{dataname_jpeg_tile}/metadata" .format( dataname_jpeg_tile=dvid_info["dataname"]+self.JPEGTILENAME, **dvid_info ), json=tilemeta) if dvid_info["dvid-server"].startswith("http://127.0.0.1"): def reload_meta(): reload_server_metadata(dvid_info["dvid-server"]) self.run_on_each_worker( reload_meta ) # TODO Validation: should verify syncs exist, should verify pyramid depth # TODO: set syncs for pyramids, tiles if base datatype exists # syncs should be removed before ingestion and added afterward levels_cache = {} # iterate through each partition for arraypartition in imgreader: # DVID pad if necessary if options["has-dvidmask"]: dvidsrc = dvidSrc( dvid_info["dvid-server"], dvid_info["uuid"], dvid_info["dataname"], arraypartition, resource_server=self.resource_server, resource_port=self. resource_port) arraypartition = dvidsrc.extract_volume() # potentially need for future iterations arraypartition.persist() # check for final layer finallayer = imgreader.curr_slice > imgreader.end_slice if not options["disable-original"]: # Write level-0 of the raw data, even if we aren't writing the rest of the pyramid. dataname = datanamelossy = None if options["create-pyramid"]: dataname = dvid_info["dataname"] if options["create-pyramid-jpeg"]: datanamelossy = dvid_info["dataname"] + self.JPEGPYRAMID_NAME if (dataname or datanamelossy) and 0 not in options["skipped-pyramid-levels"]: self._write_blocks(arraypartition, dataname, datanamelossy) if options["create-tiles"] or options["create-tiles-jpeg"]: # repartition into tiles schema = partitionSchema(PartitionDims(1,0,0)) tilepartition = schema.partition_data(arraypartition) # write unpadded tilesize (will pad with delimiter if needed) self._writeimagepyramid(tilepartition) if options["create-pyramid"] or options["create-pyramid-jpeg"]: if 0 not in levels_cache: levels_cache[0] = [] levels_cache[0].append(arraypartition) curr_level = 1 downsample_factor = 2 # should be a multiple of Z blocks or the final fetch assert imgreader.curr_slice % options["blocksize"] == 0 while ((((imgreader.curr_slice // options["blocksize"]) % downsample_factor) == 0) or finallayer) and curr_level <= options["pyramid-depth"]: partlist = levels_cache[curr_level-1] part = partlist[0] # union all RDDs from the same level for iter1 in range(1, len(partlist)): part = part.union(partlist[iter1]) # downsample map israw = options["is-rawarray"] def downsample(part_vol): part, vol = part_vol if not israw: vol = downsample_3Dlabels(vol)[0] else: vol = downsample_raw(vol)[0] return (part, vol) downsampled_array = part.map(downsample) # repart (vol and offset will always be power of two because of padding) def repartition_down(part_volume): part, volume = part_volume downsampled_offset = np.array(part.get_offset()) // 2 downsampled_reloffset = np.array(part.get_reloffset()) // 2 offsetnew = VolumeOffset(*downsampled_offset) reloffsetnew = VolumeOffset(*downsampled_reloffset) partnew = volumePartition((offsetnew.z, offsetnew.y, offsetnew.x), offsetnew, reloffset=reloffsetnew) return partnew, volume downsampled_array = downsampled_array.map(repartition_down) # repartition downsample data partition_dims = PartitionDims(options["blocksize"], options["blocksize"], self.partition_size) schema = partitionSchema( partition_dims, blank_delimiter=options["blankdelimiter"], padding=options["blocksize"], enablemask=options["has-dvidmask"] ) downsampled_array = schema.partition_data(downsampled_array) # persist before padding if there are more levels if curr_level < options["pyramid-depth"]: downsampled_array.persist() if curr_level not in levels_cache: levels_cache[curr_level] = [] levels_cache[curr_level].append(downsampled_array) # pad from DVID (move before persist will allow multi-ingest # but will lead to slightly non-optimal downsampling boundary # effects if using a lossy compression only. if options["has-dvidmask"]: padname = dvid_info["dataname"] if options["create-pyramid-jpeg"]: # !! should pad with orig if computing # pad with jpeg padname += self.JPEGPYRAMID_NAME padname += "_%d" % curr_level dvidsrc = dvidSrc( dvid_info["dvid-server"], dvid_info["uuid"], padname, downsampled_array, resource_server=self.resource_server, resource_port=self.resource_port ) downsampled_array = dvidsrc.extract_volume() # write result downname = None downnamelossy = None if options["create-pyramid"]: downname = dvid_info["dataname"] + "_%d" % curr_level if options["create-pyramid-jpeg"]: downnamelossy = dvid_info["dataname"] + self.JPEGPYRAMID_NAME + "_%d" % curr_level if curr_level not in options["skipped-pyramid-levels"]: self._write_blocks(downsampled_array, downname, downnamelossy) # remove previous level del levels_cache[curr_level-1] curr_level += 1 downsample_factor *= 2