def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize,ysize,xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter*maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:,:,iterx:iterx+BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter*maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST)
def writeimagepyramid(image): slicenum, imnpy = image from PIL import Image from scipy import ndimage import io from libdvid import ConnectionMethod node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # actually perform tile load def loadTile(reqpair): urlreq, reqbuff = reqpair node_service.custom_request(urlreq, reqbuff, ConnectionMethod.POST) #session.post(urlreq , data=reqbuff) work_queue = [] # iterate slice by slice imlevels = [] imlevels.append(imnpy) # use generic downsample algorithm for level in range(1, maxlevel+1): dim1, dim2 = imlevels[level-1].shape imlevels.append(ndimage.interpolation.zoom(imlevels[level-1], 0.5)) # write pyramid for each slice using custom request for levelnum in range(0, len(imlevels)): levelslice = imlevels[levelnum] dim1, dim2 = levelslice.shape num1tiles = (dim1-1) // TILESIZE + 1 num2tiles = (dim2-1) // TILESIZE + 1 for iter1 in range(0, num1tiles): for iter2 in range(0, num2tiles): # extract tile tileholder = numpy.zeros((TILESIZE, TILESIZE), numpy.uint8) min1 = iter1*TILESIZE min2 = iter2*TILESIZE tileslice = levelslice[min1:min1+TILESIZE, min2:min2+TILESIZE] t1, t2 = tileslice.shape tileholder[0:t1, 0:t2] = tileslice # write tileholder to dvid buf = BytesIO() img = Image.frombuffer('L', (TILESIZE, TILESIZE), tileholder.tostring(), 'raw', 'L', 0, 1) imformatpil = imformat if imformat == "jpg": imformatpil = "jpeg" img.save(buf, format=imformatpil) loadTile((tilename + "/tile/xy/" + str(levelnum) + "/" + str(iter2) + "_" + str(iter1) + "_" + str(slicenum), buf.getvalue())) buf.close()
def node_service(self): if self._node_service is None: try: # We don't pass the resource manager details here # because we use the resource manager from python. self._node_service = retrieve_node_service(self._server, self._uuid, "", "") except Exception as ex: host = socket.gethostname() msg = f"Host {host}: Failed to connect to {self._server} / {self._uuid}" raise RuntimeError(msg) from ex return self._node_service
def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service(server, uuid, resource_server, resource_port) shape_zyx = ( BLKSIZE*2, BLKSIZE*2, maxxrun*BLKSIZE ) offset_zyx = (ziter*BLKSIZE*2, yiter*BLKSIZE*2, xiter*BLKSIZE*maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx)
def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize,ysize,xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:,:,iterx:iterx+blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST)
def get_seg(): node_service = retrieve_node_service(pdconf["dvid-server"], pdconf["uuid"], resource_server, resource_port) # retrieve data from box start position # Note: libdvid uses zyx order for python functions if resource_server != "": return node_service.get_labels3D(str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2-border, subvolume.box.y1-border, subvolume.box.x1-border)) else: return node_service.get_labels3D(str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2-border, subvolume.box.y1-border, subvolume.box.x1-border))
def _init_skeletons_instance(self): dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] if is_node_locked(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"]): raise RuntimeError(f"Can't write skeletons/meshes: The node you specified ({dvid_info['dvid']['server']} / {dvid_info['dvid']['uuid']}) is locked.") node_service = retrieve_node_service( dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"], options["resource-server"], options["resource-port"] ) if "neutube-skeleton" in options["output-types"]: node_service.create_keyvalue(dvid_info["dvid"]["skeletons-destination"]) if "mesh" in options["output-types"]: node_service.create_keyvalue(dvid_info["dvid"]["meshes-destination"])
def _init_meshes_instances(self): dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] if is_node_locked(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"]): raise RuntimeError(f"Can't write meshes: The node you specified ({dvid_info['dvid']['server']} / {dvid_info['dvid']['uuid']}) is locked.") node_service = retrieve_node_service( dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"], options["resource-server"], options["resource-port"] ) self.mesh_instances = [] for simplification_ratio in self.config_data["mesh-config"]["simplify-ratios"]: instance_name = dvid_info["dvid"]["meshes-destination"] if len(self.config_data["mesh-config"]["simplify-ratios"]) > 1: instance_name += f"_dec{simplification_ratio:.2f}" node_service.create_keyvalue( instance_name ) self.mesh_instances.append( instance_name )
def get_seg(): node_service = retrieve_node_service( pdconf["dvid-server"], pdconf["uuid"], resource_server, resource_port) # retrieve data from box start position # Note: libdvid uses zyx order for python functions if resource_server != "": return node_service.get_labels3D( str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2 - border, subvolume.box.y1 - border, subvolume.box.x1 - border)) else: return node_service.get_labels3D( str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2 - border, subvolume.box.y1 - border, subvolume.box.x1 - border))
def retrieveslices(blknum): # grab slice with 3d volume call node_service = retrieve_node_service(server, uuid, resource_server, resource_port) vol = None if resource_server != "": # Note: libdvid uses zyx order for python functions if axis == "xy": shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx elif axis == "xz": shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False ) vol_yzx = vol_zyx.transpose((1,0,2)) vol = vol_yzx else: shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE ) offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE ) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx, throttle=False ) vol = vol_zyx.transpose((2,0,1)) else: if axis == "xy": shape_zyx = ( BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (blknum*BLKSIZE, ymin*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx) vol = vol_zyx elif axis == "xz": shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, BLKSIZE, (xmax+1)*BLKSIZE-xmin*BLKSIZE ) offset_zyx = (zmin*BLKSIZE, blknum*BLKSIZE, xmin*BLKSIZE) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx ) vol_yzx = vol_zyx.transpose((1,0,2)) vol = vol_yzx else: shape_zyx = ( (zmax+1)*BLKSIZE-zmin*BLKSIZE, (ymax+1)*BLKSIZE-ymin*BLKSIZE, BLKSIZE ) offset_zyx = ( zmin*BLKSIZE, ymin*BLKSIZE, blknum*BLKSIZE ) vol_zyx = node_service.get_gray3D( str(grayname), shape_zyx, offset_zyx ) vol = vol_zyx.transpose((2,0,1)) return (blknum, vol)
def _init_skeletons_instance(self): dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] if is_node_locked(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"]): raise RuntimeError( f"Can't write skeletons/meshes: The node you specified ({dvid_info['dvid']['server']} / {dvid_info['dvid']['uuid']}) is locked." ) node_service = retrieve_node_service(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"], options["resource-server"], options["resource-port"]) if "neutube-skeleton" in options["output-types"]: node_service.create_keyvalue( dvid_info["dvid"]["skeletons-destination"]) if "mesh" in options["output-types"]: node_service.create_keyvalue( dvid_info["dvid"]["meshes-destination"])
def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service( server, uuid, resource_server, resource_port) shape_zyx = (BLKSIZE * 2, BLKSIZE * 2, maxxrun * BLKSIZE) offset_zyx = (ziter * BLKSIZE * 2, yiter * BLKSIZE * 2, xiter * BLKSIZE * maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx)
def _init_meshes_instances(self): dvid_info = self.config_data["dvid-info"] options = self.config_data["options"] if is_node_locked(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"]): raise RuntimeError( f"Can't write meshes: The node you specified ({dvid_info['dvid']['server']} / {dvid_info['dvid']['uuid']}) is locked." ) node_service = retrieve_node_service(dvid_info["dvid"]["server"], dvid_info["dvid"]["uuid"], options["resource-server"], options["resource-port"]) self.mesh_instances = [] for simplification_ratio in self.config_data["mesh-config"][ "simplify-ratios"]: instance_name = dvid_info["dvid"]["meshes-destination"] if len(self.config_data["mesh-config"]["simplify-ratios"]) > 1: instance_name += f"_dec{simplification_ratio:.2f}" node_service.create_keyvalue(instance_name) self.mesh_instances.append(instance_name)
def execute(self): server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) source = str(self.config_data["dvid-info"]["source"]) session = default_dvid_session() # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server req = session.get(server + "/api/node/" + uuid + "/" + source + "/info") sourcemeta = req.json() # xmin, ymin, zmin not being used explicitly yet #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"] xmin, ymin, zmin = 0, 0, 0 xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"] islabelblk = False datatype = sourcemeta["Extended"]["Values"][0]["Label"] if str(datatype) == "labelblk": islabelblk = True # !! always assume isotropic block BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0]) maxdim = max(xmax, ymax, zmax) # build pyramid until BLKSIZE * 4 import math maxlevel = int(math.log(maxdim + 1) / math.log(2)) - 2 # assume 0,0,0 start for now xspan, yspan, zspan = xmax + 1, ymax + 1, zmax + 1 xrunlimit = self.config_data["options"]["xrunlimit"] xrunlimit = xrunlimit + (xrunlimit % 2) # should be even currsource = source # create source pyramid and append _level to name for level in range(1, maxlevel + 1): node_service = retrieve_node_service(server, uuid, self.resource_server, self.resource_port, self.APPNAME) # !! limit to grayscale now prevsource = currsource currsource = source + ("_%d" % level) # TODO: set voxel resolution to base dataset (not too important in current workflows) if islabelblk: node_service.create_labelblk(currsource, None, BLKSIZE) else: node_service.create_grayscale8(currsource, BLKSIZE) # set extents for new volume (only need to do for grayscale) newsourceext = {} newsourceext["MinPoint"] = [0, 0, 0] # for now no offset newsourceext["MaxPoint"] = [ ((xspan - 1) // 2 + 1) * BLKSIZE - 1, ((yspan - 1) // 2 + 1) * BLKSIZE - 1, ((zspan - 1) // 2 + 1) * BLKSIZE - 1 ] session.post(server + "/api/node/" + uuid + "/" + currsource + "/extents", json=newsourceext) # determine number of requests maxxrun = xspan if xrunlimit > 0 and xrunlimit < xspan: maxxrun = xrunlimit if maxxrun % 2: maxxrun += 1 xsize = xspan // maxxrun if xspan % maxxrun: xsize += 1 ysize = (yspan + 1) // 2 zsize = (zspan + 1) // 2 resource_server = self.resource_server resource_port = self.resource_port for ziter2 in range(0, zsize, 2): workqueue = [] for yiter in range(0, ysize): for xiter in range(0, xsize): for miniz in range(ziter2, ziter2 + 2): workqueue.append((xiter, yiter, miniz)) # parallelize jobs pieces = self.sc.parallelize(workqueue, len(workqueue)) # grab data corresponding to xrun def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service( server, uuid, resource_server, resource_port) shape_zyx = (BLKSIZE * 2, BLKSIZE * 2, maxxrun * BLKSIZE) offset_zyx = (ziter * BLKSIZE * 2, yiter * BLKSIZE * 2, xiter * BLKSIZE * maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D(str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx) volumedata = pieces.map(retrievedata) # downsample gray data def downsamplegray(vdata): coords, data = vdata from scipy import ndimage data = ndimage.interpolation.zoom(data, 0.5) return (coords, data) # downsample label data (TODO: make faster) def downsamplelabels(vdata): coords, data = vdata import numpy zmax, ymax, xmax = data.shape data2 = numpy.zeros( (zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64) for ziter in range(0, zmax, 2): for yiter in range(0, ymax, 2): for xiter in range(0, xmax, 2): v1 = data[ziter, yiter, xiter] v2 = data[ziter, yiter, xiter + 1] v3 = data[ziter, yiter + 1, xiter] v4 = data[ziter, yiter + 1, xiter + 1] v5 = data[ziter + 1, yiter, xiter] v6 = data[ziter + 1, yiter, xiter + 1] v7 = data[ziter + 1, yiter + 1, xiter] v8 = data[ziter + 1, yiter + 1, xiter + 1] freqs = {} freqs[v2] = 0 freqs[v3] = 0 freqs[v4] = 0 freqs[v5] = 0 freqs[v6] = 0 freqs[v7] = 0 freqs[v8] = 0 freqs[v1] = 1 freqs[v2] += 1 freqs[v3] += 1 freqs[v4] += 1 freqs[v5] += 1 freqs[v6] += 1 freqs[v7] += 1 freqs[v8] += 1 maxval = 0 freqkey = 0 for key, val in freqs.items(): if val > maxval: maxval = val freqkey = key data2[ziter // 2, yiter // 2, xiter // 2] = freqkey return (coords, data2) downsampleddata = None if islabelblk: downsampleddata = volumedata.map(downsamplelabels) else: downsampleddata = volumedata.map(downsamplegray) appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] # write results ?! def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize, ysize, xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter * maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:, :, iterx:iterx + BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter * maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) downsampleddata.foreach(write2dvid) # adjust max coordinate for new level xspan = (xspan - 1) // 2 yspan = (yspan - 1) // 2 zspan = (zspan - 1) // 2
def retrieveslices(blknum): # grab slice with 3d volume call node_service = retrieve_node_service(server, uuid, resource_server, resource_port) vol = None if resource_server != "": # Note: libdvid uses zyx order for python functions if axis == "xy": shape_zyx = (BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx elif axis == "xz": shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol_yzx = vol_zyx.transpose((1, 0, 2)) vol = vol_yzx else: shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, BLKSIZE) offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE, blknum * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx, throttle=False) vol = vol_zyx.transpose((2, 0, 1)) else: if axis == "xy": shape_zyx = (BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (blknum * BLKSIZE, ymin * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol = vol_zyx elif axis == "xz": shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, BLKSIZE, (xmax + 1) * BLKSIZE - xmin * BLKSIZE) offset_zyx = (zmin * BLKSIZE, blknum * BLKSIZE, xmin * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol_yzx = vol_zyx.transpose((1, 0, 2)) vol = vol_yzx else: shape_zyx = ((zmax + 1) * BLKSIZE - zmin * BLKSIZE, (ymax + 1) * BLKSIZE - ymin * BLKSIZE, BLKSIZE) offset_zyx = (zmin * BLKSIZE, ymin * BLKSIZE, blknum * BLKSIZE) vol_zyx = node_service.get_gray3D(str(grayname), shape_zyx, offset_zyx) vol = vol_zyx.transpose((2, 0, 1)) return (blknum, vol)
def execute(self): # TODO: handle 64 bit segmentation from pyspark import SparkContext from pyspark import StorageLevel from DVIDSparkServices.reconutils.Segmentor import Segmentor self.chunksize = self.config_data["options"]["chunk-size"] # create datatype in the beginning node_service = retrieve_node_service(self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], self.resource_server, self.resource_port) # grab ROI subvolumes and find neighbors distsubvolumes = self.sparkdvid_context.parallelize_roi( self.config_data["dvid-info"]["roi"], self.chunksize, self.contextbuffer, True) contextbuffer = self.contextbuffer # do not recompute ROI for each iteration distsubvolumes.persist() # instantiate the voxel prediction plugin import importlib full_function_name = self.config_data["options"]["predict-voxels"]["function"] module_name = '.'.join(full_function_name.split('.')[:-1]) function_name = full_function_name.split('.')[-1] module = importlib.import_module(module_name) parameters = self.config_data["options"]["predict-voxels"]["parameters"] vprediction_function = partial( getattr(module, function_name), **parameters ) # determine number of iterations num_parts = len(distsubvolumes.collect()) iteration_size = self.config_data["options"]["iteration-size"] if iteration_size == 0: iteration_size = num_parts num_iters = num_parts // iteration_size if num_parts % iteration_size > 0: num_iters += 1 feature_chunk_list = [] # enable checkpointing if not empty checkpoint_dir = self.config_data["options"]["checkpoint-dir"] # enable rollback of iterations if necessary rollback = False if self.config_data["options"]["checkpoint"]: rollback = True for iternum in range(0, num_iters): # it might make sense to randomly map partitions for selection # in case something pathological is happening -- if original partitioner # is randomish than this should be fine def subset_part(sid_data): (s_id, _data) = sid_data if (s_id % num_iters) == iternum: return True return False # should preserve partitioner distsubvolumes_part = distsubvolumes.filter(subset_part) # get grayscale chunks with specified overlap gray_chunks = self.sparkdvid_context.map_grayscale8(distsubvolumes_part, self.config_data["dvid-info"]["grayscale"]) pred_checkpoint_dir = "" if checkpoint_dir: pred_checkpoint_dir = checkpoint_dir + "/prediter-" + str(iternum) # For now, we always read predictions if available, and always write them if not. # TODO: Add config settings to control read/write behavior. @Segmentor.use_block_cache(pred_checkpoint_dir, allow_read=True, allow_write=True) def predict_voxels( sv_gray ): (_subvolume, gray) = sv_gray return vprediction_function(gray, None) vox_preds = gray_chunks.values().map( predict_voxels ) # predictions only vox_preds = distsubvolumes_part.values().zip( vox_preds ) # (subvolume, predictions) pdconf = self.config_data["dvid-info"] resource_server = self.resource_server resource_port = self.resource_port # retrieve segmentation and generate features def generate_features(vox_pred): import numpy (subvolume, pred) = vox_pred pred = numpy.ascontiguousarray(pred) # extract labelblks border = 1 # only one pixel needed to find edges # get sizes of box size_z = subvolume.box.z2 + 2*border - subvolume.box.z1 size_y = subvolume.box.y2 + 2*border - subvolume.box.y1 size_x = subvolume.box.x2 + 2*border - subvolume.box.x1 # retrieve data from box start position considering border # !! technically ROI is not respected but unwritten segmentation will be ignored since it will have 0-valued pixels. @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def get_seg(): node_service = retrieve_node_service(pdconf["dvid-server"], pdconf["uuid"], resource_server, resource_port) # retrieve data from box start position # Note: libdvid uses zyx order for python functions if resource_server != "": return node_service.get_labels3D(str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2-border, subvolume.box.y1-border, subvolume.box.x1-border)) else: return node_service.get_labels3D(str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2-border, subvolume.box.y1-border, subvolume.box.x1-border)) initial_seg = get_seg() # !!! potentially dangerous but needed for now initial_seg = initial_seg.astype(numpy.uint32) pred2 = pred[(contextbuffer-border):-(contextbuffer-border), (contextbuffer-border):-(contextbuffer-border), (contextbuffer-border):-(contextbuffer-border), :].copy() z,y,x,num_chans = pred2.shape # call neuroproof and generate features from neuroproof import FocusedProofreading # "edges": [ edge ] where edge = [node1, node2, edgesize, all features...] # "vertices": [vertex ] where vertex = [id, size, all features...] features = FocusedProofreading.extract_features(initial_seg, pred2) element_list = [] # iterate edges and create ((node1, node2), features) if "Edges" in features: # could have only one vertex in a partition and no edges for edge in features["Edges"]: n1 = edge["Id1"] n2 = edge["Id2"] edge["Loc1"][0] += subvolume.box.x1 edge["Loc1"][1] += subvolume.box.y1 edge["Loc1"][2] += subvolume.box.z1 edge["Loc2"][0] += subvolume.box.x1 edge["Loc2"][1] += subvolume.box.y1 edge["Loc2"][2] += subvolume.box.z1 if n1 > n2: n1, n2 = n2, n1 element_list.append(((n1,n2), (num_chans, edge))) for node in features["Vertices"]: n1 = node["Id"] element_list.append(((n1,-1), (num_chans, node))) return element_list features = vox_preds.flatMap(generate_features) # retrieve previously computed RDD or save current RDD if checkpoint_dir != "": features = self.sparkdvid_context.checkpointRDD(features, checkpoint_dir + "/featureiter-" + str(iternum), rollback) # any forced persistence will result in costly # pickling, lz4 compressed numpy array should help features.persist(StorageLevel.MEMORY_AND_DISK_SER) feature_chunk_list.append(features) features = feature_chunk_list[0] for iter1 in range(1, len(feature_chunk_list)): # this could cause a serialization problems if there are a large number of iterations (>100) features = feature.union(feature_chunk_list[iter1]) # grab num channels from boundary prediction features.persist(StorageLevel.MEMORY_AND_DISK_SER) first_feature = features.first() (key1, key2), (num_channels, foo) = first_feature # remove num channels from features def remove_num_channels(featurepair): foo, feature = featurepair return feature features = features.mapValues(remove_num_channels) import json # merge edge and node features -- does not require reading classifier # node features are encoded as (vertex id, -1) def combine_edge_features(element1, element2): from neuroproof import FocusedProofreading if "Id2" in element1: # are edges return FocusedProofreading.combine_edge_features( json.dumps(element1, cls=NumpyConvertingEncoder), json.dumps(element2, cls=NumpyConvertingEncoder), num_channels ) else: # are vertices return FocusedProofreading.combine_vertex_features( json.dumps(element1, cls=NumpyConvertingEncoder), json.dumps(element2, cls=NumpyConvertingEncoder), num_channels ) features_combined = features.reduceByKey(combine_edge_features) #features_combined.persist() # TODO: option to serialize features to enable other analyses # join node and edge probs def retrieve_nodes(val): (n1,n2),features = val if n2 == -1: return True return False def retrieve_edges(val): (n1,n2),features = val if n2 == -1: return False return True node_features = features_combined.filter(retrieve_nodes) edge_features = features_combined.filter(retrieve_edges) node_features = node_features.map(lambda x: (x[0][0], x[1])) edge1_features = edge_features.map(lambda x: (x[0][0], x[1])) edge2_features = edge_features.map(lambda x: (x[0][1], x[1])) # multiple edges with the same key edge1_node_features = edge1_features.leftOuterJoin(node_features) edge2_node_features = edge2_features.leftOuterJoin(node_features) def reset_edgekey(val): key, (edge, node) = val n1 = edge["Id1"] n2 = edge["Id2"] if n1 > n2: n1, n2 = n2, n1 return ((n1,n2), (edge, node)) edge1_node_features = edge1_node_features.map(reset_edgekey) edge2_node_features = edge2_node_features.map(reset_edgekey) edge_node_features = edge1_node_features.join(edge2_node_features) # generate prob for each edge (JSON: body sizes, edge list with prob) classifierlocation = self.config_data["options"]["segment-classifier"] def compute_prob(edge_node_features): from neuroproof import FocusedProofreading classifier = FocusedProofreading.ComputeProb(str(classifierlocation), num_channels) res_list = [] for edge_node_edge_node in edge_node_features: edge_key, ((edge, node1), (edge_dummy, node2)) = edge_node_edge_node weight = classifier.compute_prob( json.dumps(edge, cls=NumpyConvertingEncoder), json.dumps(node1, cls=NumpyConvertingEncoder), json.dumps(node2, cls=NumpyConvertingEncoder) ) # node1, node2 res_list.append((int(node1["Id"]),int(node2["Id"]),int(node1["Weight"]),int(node2["Weight"]),int(edge["Weight"]),weight,edge["Loc1"], edge["Loc2"])) return res_list # avoid loading large classifier for each small edge allprobs = edge_node_features.mapPartitions(compute_prob) # collect all edges and send to DVID (TODO: add option to dump to disk) allprobs_combined = allprobs.collect() bodyinfo = {} edges = [] for edge_info in allprobs_combined: node1, node2, node1_size, node2_size, edge_size, weight, loc1, loc2 = edge_info bodyinfo[node1] = node1_size bodyinfo[node2] = node2_size edges.append({"Id1": node1, "Id2": node2, "Weight": weight, "Loc1": loc1, "Loc2": loc2}) bodies = [] for (key, val) in bodyinfo.items(): bodies.append({"Id": key, "Weight": val}) graph = {} graph["Vertices"] = bodies graph["Edges"] = edges SAVE_TO_FILE = False if SAVE_TO_FILE: graph_filepath = '/tmp/graph-output.json' with open(graph_filepath, 'w') as f: self.workflow_entry_exit_printer.warn("Writing graph json to file:\n{}".format(graph_filepath)) import json json.dump(graph, f, indent=4, separators=(',', ': '), cls=NumpyConvertingEncoder) self.workflow_entry_exit_printer.write_data("Wrote graph to disk") # write to logger after spark job UPLOAD_TO_DVID = True if UPLOAD_TO_DVID: # load entire graph into DVID node_service.create_graph(str(self.config_data["dvid-info"]["graph-name"])) server = str(self.config_data["dvid-info"]["dvid-server"]) #if not server.startswith("http://"): # server = "http://" + server #session = default_dvid_session() #session.post(server + "/api/node/" + str(self.config_data["dvid-info"]["uuid"]) + "/" + str(self.config_data["dvid-info"]["graph-name"]) + "/subgraph", json=graph) #self.workflow_entry_exit_printer.write_data("Wrote DVID graph") # write to logger after spark job if self.config_data["options"]["debug"]: import json print("DEBUG:", json.dumps(graph, cls=NumpyConvertingEncoder)) # write dvid to specified file (if provided) if "output-file" in self.config_data["options"] and self.config_data["options"]["output-file"] != "": filename = self.config_data["options"]["output-file"] edgelist = [] for edge in graph["Edges"]: edgelist.append({"node1": edge["Id1"], "node2": edge["Id2"], "weight": edge["Weight"], "loc1": edge["Loc1"], "loc2": edge["Loc2"]}) npgraph = {} npgraph["edge_list"] = edgelist fout = open(filename, 'w') fout.write(json.dumps(npgraph, cls=NumpyConvertingEncoder))
def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize, ysize, xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter * maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D( currsource, data, (zbindex * BLKSIZE, ybindex * BLKSIZE, xbindex * BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:, :, iterx:iterx + BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter * maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request( str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST)
def write_blocks(part_vol): logger = logging.getLogger(__name__) part, data = part_vol offset = part.get_offset() reloffset = part.get_reloffset() _, _, x_size = data.shape if x_size % blksize != 0: # check if padded raise ValueError("Data is not block aligned") shiftedoffset = (offset.z + reloffset.z, offset.y + reloffset.y, offset.x + reloffset.x) logger.info("Starting WRITE of partition at: {} size: {}".format( shiftedoffset, data.shape)) node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # Find all non-zero blocks (and record by block index) block_coords = [] for block_index, block_x in enumerate(range(0, x_size, blksize)): if not (data[:, :, block_x:block_x + blksize] == delimiter).all(): block_coords.append( (0, 0, block_index )) # (Don't care about Z,Y indexes, just X-index) # Find *runs* of non-zero blocks block_runs = runlength_encode( block_coords, True) # returns [[Z,Y,X1,X2], [Z,Y,X1,X2], ...] # Convert stop indexes from inclusive to exclusive block_runs[:, -1] += 1 # Discard Z,Y indexes and convert from indexes to pixels ranges = blksize * block_runs[:, 2:4] # iterate through contiguous blocks and write to DVID # TODO: write compressed data directly into DVID for (data_x_start, data_x_end) in ranges: with Timer() as copy_timer: datacrop = data[:, :, data_x_start:data_x_end].copy() logger.info("Copied {}:{} in {:.3f} seconds".format( data_x_start, data_x_end, copy_timer.seconds)) data_offset_zyx = (shiftedoffset[0], shiftedoffset[1], shiftedoffset[2] + data_x_start) if dataname is not None: with Timer() as put_timer: if not israw: logger.info("STARTING Put: labels block {}".format( data_offset_zyx)) if resource_server != "" or dvid_info[ "dvid-server"].startswith( "http://127.0.0.1"): node_service.put_labels3D(dataname, datacrop, data_offset_zyx, compress=True, throttle=False) else: node_service.put_labels3D(dataname, datacrop, data_offset_zyx, compress=True) else: logger.info("STARTING Put: raw block {}".format( data_offset_zyx)) if resource_server != "" or dvid_info[ "dvid-server"].startswith( "http://127.0.0.1"): node_service.put_gray3D(dataname, datacrop, data_offset_zyx, compress=False, throttle=False) else: node_service.put_gray3D(dataname, datacrop, data_offset_zyx, compress=False) logger.info("Put block {} in {:.3f} seconds".format( data_offset_zyx, put_timer.seconds)) if dataname_lossy is not None: logger.info( "STARTING Put: lossy block {}".format(data_offset_zyx)) with Timer() as put_lossy_timer: if resource_server != "" or dvid_info[ "dvid-server"].startswith("http://127.0.0.1"): node_service.put_gray3D(dataname_lossy, datacrop, data_offset_zyx, compress=False, throttle=False) else: node_service.put_gray3D(dataname_lossy, datacrop, data_offset_zyx, compress=False) logger.info("Put lossy block {} in {:.3f} seconds".format( data_offset_zyx, put_lossy_timer.seconds))
def neuroproof_agglomerate(grayscale, predictions, supervoxels, classifier, threshold = 0.20, mitochannel = 2): """Main agglomeration function Args: grayscale = 3D uing8 (z,y,x) -- Not used. predictions = 4D float32 numpy label array (z, y, x, ch) supervoxels = 3D uint32 numpy label array (z,y,x) classifier = file location or DVID (assume to be xml unless .h5 is explict in name) threshold = threshold (default = 0.20) mitochannel = prediction channel for mito (default 2) (empty means no mito mode) Returns: segmentation = 3D numpy label array (z,y,x) """ print("neuroproof_agglomerate(): Starting with label data: dtype={}, shape={}".format(str(supervoxels.dtype), supervoxels.shape)) import numpy # return immediately if no segmentation if len(numpy.unique(supervoxels)) <= 1: return supervoxels #from neuroproof import Classifier, Agglomeration from neuroproof import Agglomeration import os # verify channels assert predictions.ndim == 4 z,y,x,nch = predictions.shape if nch > 2: # make sure mito is in the second channel predictions[[[[2, mitochannel]]]] = predictions[[[[mitochannel, mitochannel]]]] pathname = str(classifier["path"]) tempfilehold = None tclassfile = "" # write classifier to temporary file if stored on DVID if "dvid-server" in classifier: # allow user to specify any server and version for the data dvidserver = classifier["dvid-server"] uuid = classifier["uuid"] # extract file and store into temporary location node_service = retrieve_node_service(str(dvidserver), str(uuid)) name_key = pathname.split('/') classfile = node_service.get(name_key[0], name_key[1]) # create temp file import tempfile tempfilehold = tempfile.NamedTemporaryFile(delete=False) # open file and write data with open(tempfilehold.name, 'w') as fout: fout.write(classfile) # move temporary file to have the same extension as provided file if pathname.endswith('.h5'): tclassfile = tempfilehold.name + ".h5" else: tclassfile = tempfilehold.name + ".xml" os.rename(tempfilehold.name, tclassfile) else: # just read from directory tclassfile = pathname # load classifier from file #classifier = loadClassifier(tclassfile) # run agglomeration (supervoxels must be 32 uint and predicitons must be float32) segmentation = Agglomeration.agglomerate(supervoxels.astype(numpy.uint32), predictions.astype(numpy.float32), tclassfile, threshold) if tempfilehold is not None: os.remove(tclassfile) return segmentation
def execute(self): server = str(self.config_data["dvid-info"]["dvid-server"]) uuid = str(self.config_data["dvid-info"]["uuid"]) source = str(self.config_data["dvid-info"]["source"]) session = default_dvid_session() # determine grayscale blk extants if not server.startswith("http://"): server = "http://" + server req = session.get(server + "/api/node/" + uuid + "/" + source + "/info") sourcemeta = req.json() # xmin, ymin, zmin not being used explicitly yet #xmin, ymin, zmin = sourcemeta["Extended"]["MinIndex"] xmin, ymin, zmin = 0, 0, 0 xmax, ymax, zmax = sourcemeta["Extended"]["MaxIndex"] islabelblk = False datatype = sourcemeta["Extended"]["Values"][0]["Label"] if str(datatype) == "labelblk": islabelblk = True # !! always assume isotropic block BLKSIZE = int(sourcemeta["Extended"]["BlockSize"][0]) maxdim = max(xmax,ymax,zmax) # build pyramid until BLKSIZE * 4 import math maxlevel = int(math.log(maxdim+1) / math.log(2)) - 2 # assume 0,0,0 start for now xspan, yspan, zspan = xmax+1, ymax+1, zmax+1 xrunlimit = self.config_data["options"]["xrunlimit"] xrunlimit = xrunlimit + (xrunlimit % 2) # should be even currsource = source # create source pyramid and append _level to name for level in range(1, maxlevel+1): node_service = retrieve_node_service(server, uuid, self.resource_server, self.resource_port, self.APPNAME) # !! limit to grayscale now prevsource = currsource currsource = source + ("_%d" % level) # TODO: set voxel resolution to base dataset (not too important in current workflows) if islabelblk: node_service.create_labelblk(currsource, None, BLKSIZE) else: node_service.create_grayscale8(currsource, BLKSIZE) # set extents for new volume (only need to do for grayscale) newsourceext = {} newsourceext["MinPoint"] = [0,0,0] # for now no offset newsourceext["MaxPoint"] = [((xspan-1) // 2+1)*BLKSIZE-1,((yspan-1) // 2+1)*BLKSIZE-1,((zspan-1) // 2+1)*BLKSIZE-1] session.post(server + "/api/node/" + uuid + "/" + currsource + "/extents", json=newsourceext) # determine number of requests maxxrun = xspan if xrunlimit > 0 and xrunlimit < xspan: maxxrun = xrunlimit if maxxrun % 2: maxxrun += 1 xsize = xspan // maxxrun if xspan % maxxrun: xsize += 1 ysize = (yspan+1) // 2 zsize = (zspan+1) // 2 resource_server = self.resource_server resource_port = self.resource_port for ziter2 in range(0, zsize, 2): workqueue = [] for yiter in range(0, ysize): for xiter in range(0, xsize): for miniz in range(ziter2, ziter2+2): workqueue.append((xiter,yiter,miniz)) # parallelize jobs pieces = self.sc.parallelize(workqueue, len(workqueue)) # grab data corresponding to xrun def retrievedata(coord): xiter, yiter, ziter = coord node_service = retrieve_node_service(server, uuid, resource_server, resource_port) shape_zyx = ( BLKSIZE*2, BLKSIZE*2, maxxrun*BLKSIZE ) offset_zyx = (ziter*BLKSIZE*2, yiter*BLKSIZE*2, xiter*BLKSIZE*maxxrun) vol_zyx = None if islabelblk: vol_zyx = node_service.get_labels3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) else: vol_zyx = node_service.get_gray3D( str(prevsource), shape_zyx, offset_zyx, throttle=False) return (coord, vol_zyx) volumedata = pieces.map(retrievedata) # downsample gray data def downsamplegray(vdata): coords, data = vdata from scipy import ndimage data = ndimage.interpolation.zoom(data, 0.5) return (coords, data) # downsample label data (TODO: make faster) def downsamplelabels(vdata): coords, data = vdata import numpy zmax, ymax, xmax = data.shape data2 = numpy.zeros((zmax // 2, ymax // 2, xmax // 2)).astype(numpy.uint64) for ziter in range(0,zmax,2): for yiter in range(0, ymax,2): for xiter in range(0,xmax,2): v1 = data[ziter, yiter, xiter] v2 = data[ziter, yiter, xiter+1] v3 = data[ziter, yiter+1, xiter] v4 = data[ziter, yiter+1, xiter+1] v5 = data[ziter+1, yiter, xiter] v6 = data[ziter+1, yiter, xiter+1] v7 = data[ziter+1, yiter+1, xiter] v8 = data[ziter+1, yiter+1, xiter+1] freqs = {} freqs[v2] = 0 freqs[v3] = 0 freqs[v4] = 0 freqs[v5] = 0 freqs[v6] = 0 freqs[v7] = 0 freqs[v8] = 0 freqs[v1] = 1 freqs[v2] += 1 freqs[v3] += 1 freqs[v4] += 1 freqs[v5] += 1 freqs[v6] += 1 freqs[v7] += 1 freqs[v8] += 1 maxval = 0 freqkey = 0 for key, val in freqs.items(): if val > maxval: maxval = val freqkey = key data2[ziter // 2, yiter // 2, xiter // 2] = freqkey return (coords, data2) downsampleddata = None if islabelblk: downsampleddata = volumedata.map(downsamplelabels) else: downsampleddata = volumedata.map(downsamplegray) appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] # write results ?! def write2dvid(vdata): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) coords, data = vdata xiter, yiter, ziter = coords # set block indices zbindex = ziter ybindex = yiter zsize,ysize,xsize = data.shape #xrun = xsize/BLKSIZE xbindex = xiter*maxxrun // 2 # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 if islabelblk: vals = numpy.unique(data) # TODO: ignore blank blocks within an x line if not (len(vals) == 1 and vals[0] == 0): if resource_server != "": node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True, throttle=False) else: node_service.put_labels3D(currsource, data, (zbindex*BLKSIZE, ybindex*BLKSIZE, xbindex*BLKSIZE), compress=True) else: for iterx in range(0, xsize, BLKSIZE): block = data[:,:,iterx:iterx+BLKSIZE] vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = xiter*maxxrun // 2 + iterx // BLKSIZE startblock = True blockbuffer += block.tobytes() xrun += 1 # write-out leftover blocks if xrun > 0: node_service.custom_request(str((currsource + "/blocks/%d_%d_%d/%d") % (xbindex, ybindex, zbindex, xrun)), blockbuffer, ConnectionMethod.POST) downsampleddata.foreach(write2dvid) # adjust max coordinate for new level xspan = (xspan-1) // 2 yspan = (yspan-1) // 2 zspan = (zspan-1) // 2
def execute(self): from DVIDSparkServices.reconutils import SimpleGraph from pyspark import SparkContext from pyspark import StorageLevel if "chunk-size" in self.config_data["options"]: self.chunksize = self.config_data["options"]["chunk-size"] # grab ROI distrois = self.sparkdvid_context.parallelize_roi( self.config_data["dvid-info"]["roi"], chunk_size=self.chunksize, border=1) num_partitions = distrois.getNumPartitions() # map ROI to label volume (1 pixel overlap) label_chunks = self.sparkdvid_context.map_labels64( distrois, self.config_data["dvid-info"]["label-name"], border=1, roiname=self.config_data["dvid-info"]["roi"]) # map labels to graph data -- external program (eventually convert neuroproof metrics and graph to a python library) ?! sg = SimpleGraph.SimpleGraph(self.config_data["options"]) # extract graph graph_elements = label_chunks.flatMap(sg.build_graph) # group data for vertices and edges graph_elements_red = graph_elements.reduceByKey(lambda a, b: a + b) # repartition by first vertex to better group edges together graph_elements_red = graph_elements_red.partitionBy( num_partitions, lambda a: hash(a[0])) graph_elements_red.persist(StorageLevel.MEMORY_ONLY) # ?? graph_vertices = graph_elements_red.filter(sg.is_vertex) graph_edges = graph_elements_red.filter(sg.is_edge) # create graph node_service = retrieve_node_service( self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], self.resource_server, self.resource_port) node_service.create_graph( str(self.config_data["dvid-info"]["graph-name"])) # dump graph -- should this be wrapped through utils or through sparkdvid ?? # will this result in too many request (should they be accumulated) ?? # currently looking at one partitioning at a time to try to group requests self.sparkdvid_context.foreachPartition_graph_elements( graph_vertices, self.config_data["dvid-info"]["graph-name"]) self.sparkdvid_context.foreachPartition_graph_elements( graph_edges, self.config_data["dvid-info"]["graph-name"]) if "debug" in self.config_data["options"] and self.config_data[ "options"]["debug"]: num_elements = graph_elements.count() print("DEBUG: ", num_elements) graph_elements_red.unpersist()
def execute(self): from PIL import Image import numpy import os iterslices = self.BLKSIZE * self.config_data["options"][ "numblocklayers"] minslice = self.config_data["minslice"] # map file to numpy array basename = self.config_data["basename"] # format should be gs://<bucket>/path gbucketname = "" gpath = "" if basename.startswith('gs://'): # parse google bucket names tempgs = basename.split('//') bucketpath = tempgs[1].split('/') gbucketname = bucketpath[0] gpath = '/'.join(bucketpath[1:]) server = None xoffset = yoffset = zoffset = 0 if "offset" in self.config_data["options"]: xoffset = self.config_data["options"]["offset"][0] yoffset = self.config_data["options"]["offset"][1] zoffset = self.config_data["options"]["offset"][2] if xoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if yoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if zoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") xoffset /= self.BLKSIZE yoffset /= self.BLKSIZE zoffset /= self.BLKSIZE # this will start the Z block writing at the specified offse # (changes default behavior when loading nonzero starting image slice) zoffset -= (minslice // self.BLKSIZE) # create metadata before workers start if using DVID if "output-dir" not in self.config_data or self.config_data[ "output-dir"] == "": # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] resource_server = str(self.resource_server) resource_port = self.resource_port # create grayscale type node_service = retrieve_node_service(server, uuid, resource_server, resource_port, self.APPNAME) node_service.create_grayscale8(str(grayname), self.BLKSIZE) for slice in range(self.config_data["minslice"], self.config_data["maxslice"] + 1, iterslices): # parallelize images across many machines imgs = self.sc.parallelize(list(range(slice, slice + iterslices)), iterslices) def img2npy(slicenum): try: img = None if gbucketname == "": img = Image.open(basename % slicenum) else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % slicenum) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) return slicenum, numpy.array(img) except Exception as e: # just return a blank slice -- will be handled downstream return slicenum, numpy.zeros((0, 0), numpy.uint8) npy_images = imgs.map(img2npy) # map numpy array into y lines of block height blocksize = self.BLKSIZE blocklimit = self.BLOCKLIMIT def npy2lines(arrpair): z, arr = arrpair ysize, xsize = arr.shape npylines = [] for itery in range(0, ysize, blocksize): line = numpy.zeros( (blocksize, ((xsize - 1) // blocksize + 1) * blocksize), numpy.uint8) uppery = blocksize if (itery + blocksize) > ysize: uppery = ysize - itery line[0:uppery, 0:xsize] = arr[itery:itery + blocksize, 0:xsize] npylines.append((itery // blocksize, (z, line))) return npylines npy_lines = npy_images.flatMap(npy2lines) # reduce y lines into DVID blocks groupedlines = npy_lines.groupByKey() # map y lines => (y, blocks) def lines2blocks(linespair): y, linesp = linespair xsize = None blockdata = None for z, line in linesp: if xsize is None: _, xsize = line.shape blockdata = numpy.zeros((iterslices, blocksize, xsize), numpy.uint8) blockdata[(z - minslice) % iterslices, :, :] = line return y, blockdata yblocks = groupedlines.map(lines2blocks) # map multilayer of blocks to an array of single layer blocks def multi2single(yblocks): ybindex, blocks = yblocks blockarr = [] num_layers = iterslices // blocksize for layer in range(0, num_layers): blockarr.append( ((ybindex, layer), blocks[layer * blocksize:(layer * blocksize + blocksize), :, :])) return blockarr yblockssplit = yblocks.flatMap(multi2single) if "output-dir" in self.config_data and self.config_data[ "output-dir"] != "": # write blocks to disk for separte post-process -- write directly to DVID eventually? output_dir = self.config_data["output-dir"] def write2disk(yblocks): zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize, ysize, xsize = blocks.shape outdir = output_dir outdir += "/" + ("%05d" % zbindex) + ".z/" filename = outdir + ("%05d" % ybindex) + "-" + str( xsize // blocksize) + ".blocks" try: os.makedirs(outdir) except Exception as e: pass # extract blocks from buffer and write to disk fout = open(filename, 'wb') for iterx in range(0, xsize, blocksize): block = blocks[:, :, iterx:iterx + blocksize].copy() fout.write(block) fout.close() yblockssplit.foreach(write2disk) else: # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize, ysize, xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:, :, iterx:iterx + blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) yblockssplit.foreach(write2dvid) self.workflow_entry_exit_printer.write_data("Ingested %d slices" % iterslices) # just fetch one image at driver to get dims width = height = 1 try: img = None if gbucketname == "": img = Image.open(basename % minslice) width, height = img.width, img.height else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % minslice) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) width, height = img.width, img.height except Exception as e: # just set size to 1 pass if "output-dir" not in self.config_data or self.config_data[ "output-dir"] == "": # update metadata grayext = {} grayext["MinPoint"] = [ xoffset * self.BLKSIZE, yoffset * self.BLKSIZE, zoffset * self.BLKSIZE + minslice ] grayext["MaxPoint"] = [ xoffset * self.BLKSIZE + width - 1, yoffset * self.BLKSIZE + height - 1, zoffset * self.BLKSIZE + minslice + self.config_data["maxslice"] ] if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() session.post(server + "/api/node/" + uuid + "/" + grayname + "/extents", json=grayext)
def findindexerrors(bodies): index, bodylist = bodies bodymappings = {} rangequery = [] for (body, bids) in bodylist: bodymappings[body] = bids rangequery.append(body) # call block index DVID API from libdvid import ConnectionMethod rangequery.sort() b1 = rangequery[0] b2 = rangequery[-1] ns = retrieve_node_service(server, uuid, resource_server, resource_port, appname) addr = str(labelname + "/sparsevols-coarse/" + str(b1) + "/" + str(b2)) res = ns.custom_request(addr, None, ConnectionMethod.GET) bodyblockrle = np.fromstring(res, dtype=np.int32) currindex = 0 bodymappingsdvid = {} while currindex < len(bodyblockrle): # retrieve bodies hb = bodyblockrle[currindex] lb = bodyblockrle[currindex+1] currbody = hb | lb << 32 currindex += 2 # retrieve runlengths numspans = bodyblockrle[currindex] currindex += 1 blockarray = [] for index in range(numspans): dimx = bodyblockrle[currindex] currindex += 1 dimy = bodyblockrle[currindex] currindex += 1 dimz = bodyblockrle[currindex] currindex += 1 runx = bodyblockrle[currindex] currindex += 1 # create body mappings for xblock in range(dimx, dimx+runx): blockarray.append((dimz, dimy, xblock)) bodymappingsdvid[currbody] = blockarray allerrors = [] # find differences for body, blocklist in bodymappings.items(): if body not in bodymappingsdvid: allerrors.append([True, body, blocklist]) continue # false negatives bset = set(blocklist) bsetdvid = set(bodymappingsdvid[body]) errors = list(bset - bsetdvid) if len(errors) > 0: allerrors.append([True, body, errors]) # false positives errors2 = list(bsetdvid - bset) if len(errors2) > 0: allerrors.append([False, body, errors2]) return allerrors
def execute(self): # imports here so that schema can be retrieved without installation from DVIDSparkServices.reconutils.metrics import Evaluate from pyspark import SparkContext from pyspark import StorageLevel import time import datetime import json starttime = time.time() node_service = retrieve_node_service(self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], self.resource_server, self.resource_port) if "chunk-size" in self.config_data["options"]: self.chunksize = self.config_data["options"]["chunk-size"] # check if downsampling possible downsample_level = self.config_data["options"]["downsample-level"] # do not allow dowsampling by more that 32x in each dim assert (downsample_level <= 5 or downsample_level >= 0) if downsample_level > 0: # check if labelmap or labelarray and max and levellevel datameta = node_service.get_typeinfo(str(self.config_data["dvid-info"]["label-name"])) labeltype = datameta["Base"]["TypeName"] assert labeltype in ("labelarray", "labelmap") maxlevel = datameta["Extended"]["MaxDownresLevel"] assert maxlevel >= downsample_level if "dvid-info-comp" in self.config_data: node_service2 = retrieve_node_service(self.config_data["dvid-info-comp"]["dvid-server"], self.config_data["dvid-info-comp"]["uuid"], self.resource_server, self.resource_port) datameta = node_service2.get_typeinfo(str(self.config_data["dvid-info-comp"]["label-name"])) labeltype = datameta["Base"]["TypeName"] assert labeltype in ("labelarray", "labelmap") maxlevel = datameta["Extended"]["MaxDownresLevel"] assert maxlevel >= downsample_level # grab ROI (no overlap and no neighbor checking) distrois = self.sparkdvid_context.parallelize_roi(self.config_data["dvid-info"]["roi"], self.chunksize, border=1, partition_method="grid-aligned-" + str(self.config_data["options"]["chunk-size"])) def setBorderHack(subvolume): subvolume.border = 0 return subvolume distrois = distrois.mapValues(setBorderHack) # modify substack extents and roi if downsample_level > 0: def downsampleROIs(subvolume): z1 = subvolume.box.z1 y1 = subvolume.box.y1 x1 = subvolume.box.x1 z2 = subvolume.box.z2 y2 = subvolume.box.y2 x2 = subvolume.box.x2 for level in range(0, downsample_level): subvolume.roi_blocksize = subvolume.roi_blocksize // 2 z1 = z1 // 2 y1 = y1 // 2 x1 = x1 // 2 z2 = z2 // 2 y2 = y2 // 2 x2 = x2 // 2 subvolume.box = SubvolumeNamedTuple(z1,y1,x1,z2,y2,x2) return subvolume distrois = distrois.mapValues(downsampleROIs) # check for self mode selfcompare = False dvidserver2 = "" dviduuid2 = "" dvidlname2 = "" if "dvid-info-comp" in self.config_data: dvidserver2 = self.config_data["dvid-info-comp"]["dvid-server"] dviduuid2 = self.config_data["dvid-info-comp"]["uuid"] dvidlname2 = self.config_data["dvid-info-comp"]["label-name"] # map ROI to two label volumes (0 overlap) # this will be used for all volume and point overlaps # (preserves partitioner) # (key, (subvolume, seggt, seg2) # creates a dummy volume if no second server is available lpairs = self.sparkdvid_context.map_labels64_pair( distrois, self.config_data["dvid-info"]["label-name"], dvidserver2, dviduuid2, dvidlname2, self.config_data["dvid-info"]["roi"], downsample_level) # TODO ?? how to handle debug coords # filter bodies if there is a body list from GT important_bodies = self.config_data["options"]["important-bodies"] if self.config_data["options"]["enable-sparse"]: # if sparse mode is enable there should be a body list assert (len(important_bodies) > 0) else: # should only filter bodies for non-sparse mode # if the bodies densely cover the volume def filter_bodies(label_pairs): from DVIDSparkServices.sparkdvid.CompressedNumpyArray import CompressedNumpyArray import numpy subvolume, labelgtc, label2c = label_pairs # extract numpy arrays labelgt = labelgtc.deserialize() # filter bodies from gt bodylist = numpy.unique(labelgt) intersecting_bodies = set(bodylist).intersection(set(important_bodies)) mask = numpy.zeros(labelgt.shape) for body in intersecting_bodies: mask[labelgt==body] = 1 labelgt[mask==0] = 0 # compress results return (subvolume, CompressedNumpyArray(labelgt), label2c) if len(important_bodies) > 0: lpairs = lpairs.mapValues(filter_bodies) def _split_disjoint_labels(label_pairs): """Helper function: map subvolumes so disconnected bodies are different labels. Function preserves partitioner. Args: label_pairs (rdd): RDD is of (subvolume id, data) Returns: Original RDD including mappings for gt and the test seg. """ from DVIDSparkServices.reconutils.morpho import split_disconnected_bodies subvolume, labelgt, label2 = label_pairs # split bodies up labelgt_split, labelgt_map = split_disconnected_bodies(labelgt) label2_split, label2_map = split_disconnected_bodies(label2) # compress results return (subvolume, labelgt_map, label2_map, labelgt_split, label2_split) # split bodies that are merged outside of the subvolume # (preserves partitioner) # => (key, (subvolume, seggt-split, seg2-split, seggt-map, seg2-map)) lpairs_split = lpairs.mapValues(_split_disjoint_labels) if self.config_data["options"]["run-cc"]: # save current segmentation state lpairs_split.persist() # apply connected components def _extractfaces(label_pairs): """Extracts 6 sides from each cube. """ key, (subvolume, gtmap, segmap, gtvol, segvol) = label_pairs # extract unique bodies not remapped allgt = set(numpy.unique(gtvol)) allseg = set(numpy.unique(segvol)) gtmapbodies = set() for key2, body in gtmap.items(): gtmapbodies.add(key2) segmapbodies = set() for key2, body in segmap.items(): segmapbodies.add(key2) allgt = allgt.difference(gtmapbodies) if 0 in allgt: allgt.remove(0) allseg = allseg.difference(segmapbodies) if 0 in allseg: allseg.remove(0) """ if 0 in allseg: allseg.remove(0) if 0 in allgt: allgt.remove(0) """ zmax,ymax,xmax = gtvol.shape start = (subvolume.box.z1, subvolume.box.y1, subvolume.box.x1) mappedfaces = [] # grab 6 faces for gt slicex0 = gtvol[:,:,0] slicexmax = gtvol[:,:,xmax-1] slicey0 = gtvol[:,0,:] sliceymax = gtvol[:,ymax-1,:] slicez0 = gtvol[0,:,:] slicezmax = gtvol[zmax-1,:,:] mappedfaces.append(( (start, (start[0]+zmax, start[1]+ymax, start[2]+1), True), [(slicex0, gtmap, key, True, allgt)] )) mappedfaces.append(( ((start[0], start[1], start[2]+xmax), (start[0]+zmax, start[1]+ymax, start[2]+xmax+1), True), [(slicexmax, gtmap, key, False, set())] )) mappedfaces.append(( (start, (start[0]+zmax, start[1]+1, start[2]+xmax), True), [(slicey0, gtmap, key, False, set())] )) mappedfaces.append(( ((start[0], start[1]+ymax, start[2]), (start[0]+zmax, start[1]+ymax+1, start[2]+xmax), True), [(sliceymax, gtmap, key, False, set())] )) mappedfaces.append(( (start, (start[0]+1, start[1]+ymax, start[2]+xmax), True), [(slicez0, gtmap, key, False, set())] )) mappedfaces.append(( ((start[0]+zmax, start[1], start[2]), (start[0]+zmax+1, start[1]+ymax, start[2]+xmax), True), [(slicezmax, gtmap, key, False, set())] )) # grab 6 faces for seg segslicex0 = segvol[:,:,0] segslicexmax = segvol[:,:,xmax-1] segslicey0 = segvol[:,0,:] segsliceymax = segvol[:,ymax-1,:] segslicez0 = segvol[0,:,:] segslicezmax = segvol[zmax-1,:,:] mappedfaces.append(( (start, (start[0]+zmax, start[1]+ymax, start[2]+1), False), [(segslicex0, segmap, key, True, allseg)] )) mappedfaces.append(( ((start[0], start[1], start[2]+xmax), (start[0]+zmax, start[1]+ymax, start[2]+xmax+1), False), [(segslicexmax, segmap, key, False, set())] )) mappedfaces.append(( (start, (start[0]+zmax, start[1]+1, start[2]+xmax), False), [(segslicey0, segmap, key, False, set())] )) mappedfaces.append(( ((start[0], start[1]+ymax, start[2]), (start[0]+zmax, start[1]+ymax+1, start[2]+xmax), False), [(segsliceymax, segmap, key, False, set())] )) mappedfaces.append(( (start, (start[0]+1, start[1]+ymax, start[2]+xmax), False), [(segslicez0, segmap, key, False, set())] )) mappedfaces.append(( ((start[0]+zmax, start[1], start[2]), (start[0]+zmax+1, start[1]+ymax, start[2]+xmax), False), [(segslicezmax, segmap, key, False, set())] )) return mappedfaces # assume there could be only one possible match def _reducematches(faces1, faces2): faces1.extend(faces2) return faces1 def _extractmatches(keyfaces): """Finds matching segments that have the same body id. """ key, faces = keyfaces # no match found if len(faces) == 1: start, end, isgt = key seg1, segmap, sid, hack1, segbodies = faces[0] bodymatches = [] if hack1: for label, body in segmap.items(): bodymatches.append(((body, isgt), [(label, sid, True)])) for body in segbodies: bodymatches.append(((body, isgt), [(body, sid, True)])) return bodymatches assert(len(faces) == 2) start, end, isgt = key seg1, segmap, sid, hack1, segbodies = faces[0] seg2, segmap2, sid2, hack2, segbodies2 = faces[1] seg1 = seg1.flatten() seg2 = seg2.flatten() seg1seg2 = numpy.column_stack((seg1, seg2)) unique_pairs = numpy.unique(seg1seg2, axis=0) bodymatches = [] for val in unique_pairs: if val[0] == 0 or val[1] == 0: continue mapped1 = val[0] if mapped1 in segmap: mapped1 = segmap[mapped1] mapped2 = val[1] if mapped2 in segmap2: mapped2 = segmap2[mapped2] if mapped1 == mapped2: bodymatches.append(((mapped1, isgt), [((val[0], sid), (val[1], sid2))])) # hack: send all bodies that have new labels # assume 1) disjoint bodies will always include implicit identity mapping # and 2) each subvolume will be represented at least 6 times if hack1: for label, body in segmap.items(): bodymatches.append(((body, isgt), [(label, sid, True)])) for body in segbodies: bodymatches.append(((body, isgt), [(body, sid, True)])) if hack2: for label, body in segmap2.items(): bodymatches.append(((body, isgt), [(label, sid2, True)])) for body in segbodies2: bodymatches.append(((body, isgt), [(body, sid2, True)])) return bodymatches def _reduce_bodies(bodies1, bodies2): """Group all bodies maps together. """ bodies1.extend(bodies2) return bodies1 flatmatches = lpairs_split.flatMap(_extractfaces).reduceByKey(_reducematches).flatMap(_extractmatches) matches = flatmatches.reduceByKey(_reduce_bodies) # should be small enough that the list can be global def _find_disjoint_bodies(matches): """Extract bodies that should be split into more than one piece. """ (bodyid, isgt), matchlist = matches merges = {} mergeset = {} for match in matchlist: # handle original mapping disjoint ids if len(match) == 3: val = (match[0], match[1]) if val not in merges: merges[val] = val mergeset[val] = set([val]) continue val, val2 = match if val2 < val: val, val2 = val2, val mappedval = val if mappedval in merges: mappedval = merges[mappedval] else: merges[val] = val if mappedval not in mergeset: mergeset[mappedval] = set([val]) else: mergeset[mappedval].add(val) mappedval2 = val2 if mappedval2 in merges: mappedval2 = merges[mappedval2] if mappedval2 not in mergeset: mergeset[mappedval2] = set([val2]) else: mergeset[mappedval2].add(val2) # if the mapped value is equal, no need for further processing if mappedval2 == mappedval: continue merges[mappedval2] = mappedval for iterval in mergeset[mappedval2]: merges[iterval] = mappedval mergeset[mappedval] = mergeset[mappedval].union(mergeset[mappedval2]) del mergeset[mappedval2] if len(mergeset) == 1: return [] bodygroups = [] for (dummy, group) in mergeset.items(): bodygroups.append(((bodyid, isgt), group)) return bodygroups # choose very large arbitary index for simplicity (but below js 2^53 limit) ccstartbodyindex = 2**51 # find disjoint mappings disjoint_bodies = matches.flatMap(_find_disjoint_bodies) mapped_bodies = disjoint_bodies.zipWithIndex() mapped_bodies.persist() # send changes to substacks def cc2sid(mapped_body): (((bodyid, isgt), group), rid) = mapped_body sidbodies = [] for (subval, sid) in group: sidbodies.append((sid, [(isgt, subval, rid+ccstartbodyindex)])) return sidbodies def groupsids(sid1, sid2): sid1.extend(sid2) return sid1 sidccbodies = mapped_bodies.flatMap(cc2sid).reduceByKey(groupsids, lpairs_split.getNumPartitions()) # shuffle mappings to substacks (does this cause a shuffle) lpairs_split_j = lpairs_split.leftOuterJoin(sidccbodies, lpairs_split.getNumPartitions()) # give new ids for subvolumes def _insertccmappings(label_pairs): ((subvolume, labelgt_map, label2_map, labelgt_split, label2_split), ccbodies) = label_pairs if ccbodies is not None: for (isgt, subval, bodyid) in ccbodies: if isgt: labelgt_map[subval] = bodyid else: label2_map[subval] = bodyid return (subvolume, labelgt_map, label2_map, labelgt_split, label2_split) lpairs_split = lpairs_split_j.mapValues(_insertccmappings) # evaluation tool (support RAND, VI, per body, graph, and # histogram stats over different sets of points) evaluator = Evaluate.Evaluate(self.config_data) ### VOLUMETRIC ANALYSIS ### # TODO: !! Grab number of intersecting disjoint faces # (might need +1 border) for split edit distance # grab volumetric body overlap ignoring boundaries as specified # and generate overlap stats for substack (compute local) # => (key, (subvolume, stats, seggt-split, seg2-split, seggt-map, seg2-map)) # (preserve partitioner) lpairs_proc = evaluator.calcoverlap(lpairs_split, self.config_data["options"]["boundary-size"]) point_data = {} ### POINT ANALYSIS ### for point_list_name in self.config_data["dvid-info"]["point-lists"]: # grab point list from DVID keyvalue = point_list_name.split('/') pointname = "" if len(keyvalue) == 2: # is this too large to broadcast?? -- default lz4 should help quite a bit # TODO: send only necessary data to each job through join might help point_data[keyvalue[1]] = node_service.get_json(str(keyvalue[0]), str(keyvalue[1])) pointname = keyvalue[1] elif len(keyvalue) == 1: # assume dvid annotation datatype and always treat as a synapse type # TODO: split this up into many small calls so that it scales syndata = node_service.custom_request(str(keyvalue[0]) + "/roi/" + str(self.config_data["dvid-info"]["roi"]), "".encode(), ConnectionMethod.GET) synjson = json.loads(syndata) synindex = {} synspot = 0 # grab index positions for synapse in synjson: synindex[tuple(synapse["Pos"])] = synspot synspot += 1 # load point data pointlist = [] for synapse in synjson: pointrel = synapse["Pos"] if synapse["Rels"] is not None: for rel in synapse["Rels"]: if rel["Rel"] == "PreSynTo": # only add relations within ROI if tuple(rel["To"]) in synindex: index = synindex[tuple(rel["To"])] pointrel.append(index) pointlist.append(pointrel) pointinfo = {"type": "synapse", "sparse": False, "point-list": pointlist} point_data[keyvalue[0]] = pointinfo pointname = keyvalue[0] else: raise Exception(str(point_list_name) + "point list key value not properly specified") # Generate per substack and global stats for given points. # Querying will just be done on the local labels stored. # (preserve partitioner) lpairs_proc = evaluator.calcoverlap_pts(lpairs_proc, pointname, point_data[pointname]) # Extract stats by retrieving substacks and stats info and # loading into data structures on the driver. stats = evaluator.calculate_stats(lpairs_proc) if self.config_data["options"]["run-cc"]: # make a global remap function def extract_disjoint_bodies(mapped_body): (((bodyid, isgt), group), rid) = mapped_body return (bodyid, rid+ccstartbodyindex) bodies_remap = mapped_bodies.map(extract_disjoint_bodies).collect() # global map of cc bodies to original body (unique across GT and seg) cc2body = {} for (bodyid, rid) in bodies_remap: cc2body[rid] = bodyid """ # map temporary CC body index to original body index for body stats # for convenience (not very necessary since # CC mappings are also provided) for bodystat in stats["bodystats"]: delkeys = [] newbodies = {} # rename bodyid -> bodyid-<num> for CC bodies for (tbody, val) in bodystat["bodies"].items(): if tbody in cc2body: delkeys.append(tbody) iter1 = 0 while (str(tbody) + "-" + str(iter1)) in newbodies: iter1 += 1 newbodies[str(tbody) + "-" + str(iter1)] = val for key in delkeys: del bodystat["bodies"][key] for (body, val) in newbodies.items(): bodystat["bodies"][body] = val """ # expand subvolume to original size if downsampled if downsample_level > 0: for sid, subvolumestats in stats["subvolumes"].items(): for stat in subvolumestats: if stat["name"] == "bbox": stat["val"] = list(stat["val"]) for pos in range(6): for level in range(downsample_level): stat["val"][pos] = stat["val"][pos]*2 # dump CC mappings for use in debugging if self.config_data["options"]["run-cc"]: stats["connected-components"] = cc2body # none or false debug = False if "debug" in self.config_data: debug = self.config_data["debug"] if debug: print("DEBUG:", json.dumps(stats, cls=NumpyConvertingEncoder)) # TODO: !! maybe generate a summary view from stats, write that back # with simplify output, dump the more complicated file to keyvalue as well # write stats and config back to DVID with time stamp # (@ name + user name + time stamp) # client should use '--' delimeter to parse name stats["time-analyzed"] = \ datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S') stats["runtime"] = time.time() - starttime stats["config-file"] = self.config_data current_time = int(time.time()) username = str(self.config_data["options"]["user-name"]) username = "******".join(username.split('.')) location = str(self.config_data["dvid-info"]["stats-location"]) location = "__".join(location.split('.')) fileloc = str(location + "--" + username + "--" + str(current_time)) node_service.create_keyvalue(self.writelocation) node_service.put(self.writelocation, fileloc, json.dumps(stats, cls=NumpyConvertingEncoder).encode('utf-8'))
def execute(self): from pyspark import SparkContext from pyspark import StorageLevel from DVIDSparkServices.reconutils.Segmentor import Segmentor resource_server = self.resource_server resource_port = self.resource_port self.chunksize = self.config_data["options"]["chunk-size"] # create datatype in the beginning mutateseg = self.config_data["options"]["mutateseg"] node_service = retrieve_node_service( self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], resource_server, resource_port) success = node_service.create_labelblk( str(self.config_data["dvid-info"]["segmentation-name"])) # check whether seg should be mutated if (not success and mutateseg == "auto") or mutateseg == "yes": mutateseg = "yes" else: mutateseg = "no" # grab ROI subvolumes and find neighbors distsubvolumes = self.sparkdvid_context.parallelize_roi( self.config_data["dvid-info"]["roi"], self.chunksize, self.overlap // 2, True, self.config_data["dvid-info"]["partition-method"], self.config_data["dvid-info"]["partition-filter"]) # do not recompute ROI for each iteration distsubvolumes.persist() num_parts = len(distsubvolumes.collect()) # Instantiate the correct Segmentor subclass (must be installed) import importlib full_segmentor_classname = self.config_data["options"]["segmentor"][ "class"] segmentor_classname = full_segmentor_classname.split('.')[-1] module_name = '.'.join(full_segmentor_classname.split('.')[:-1]) segmentor_mod = importlib.import_module(module_name) segmentor_class = getattr(segmentor_mod, segmentor_classname) segmentor = segmentor_class(self.sparkdvid_context, self) # determine number of iterations iteration_size = self.config_data["options"]["iteration-size"] if iteration_size == 0: iteration_size = num_parts num_iters = num_parts // iteration_size if num_parts % iteration_size > 0: num_iters += 1 seg_chunks_list = [] # enable checkpointing if not empty checkpoint_dir = self.config_data["options"]["checkpoint-dir"] # enable rollback of iterations if necessary rollback_seg = ( self.config_data["options"]["checkpoint"] == "segmentation") # enable rollback of boundary prediction if necessary rollback_pred = (rollback_seg or self.config_data["options"]["checkpoint"] == "voxel") for iternum in range(0, num_iters): # Disable rollback by setting checkpoint dirs to empty gray_checkpoint_dir = mask_checkpoint_dir = pred_checkpoint_dir = sp_checkpoint_dir = seg_checkpoint_dir = "" if checkpoint_dir != "": pred_checkpoint_dir = checkpoint_dir + "/prediter-" + str( iternum) seg_checkpoint_dir = checkpoint_dir + "/segiter-" + str( iternum) # Grayscale and SP caches are only written to as a "debug" feature if self.config_data["options"]["debug"]: gray_checkpoint_dir = checkpoint_dir + "/grayiter-" + str( iternum) mask_checkpoint_dir = checkpoint_dir + "/maskiter-" + str( iternum) sp_checkpoint_dir = checkpoint_dir + "/spiter-" + str( iternum) roi = self.config_data["dvid-info"]["roi"] method = self.config_data["dvid-info"]["partition-method"] roi_description = roi if method != "ask-dvid": roi_description += "-" + method roi_filter = self.config_data["dvid-info"][ "partition-filter"] if roi_filter != "all": roi_description += "-" + roi_filter # Spit out a JSON of the Subvolume list boxes ids_and_subvols = distsubvolumes.collect() subvols = [v for (_k, v) in ids_and_subvols] subvol_bounds_json = Subvolume.subvol_list_to_json(subvols) mkdir_p(checkpoint_dir) with open( checkpoint_dir + "/{}-subvol-bounds.json".format(roi_description), 'w') as f: f.write(subvol_bounds_json) # Also spit out JSON RLE for writing the modified ROI directly to DVID, in case that's useful all_blocks = Subvolume.subvol_list_all_blocks(subvols) rle = runlength_encode(all_blocks, assume_sorted=False) with open( checkpoint_dir + "/{}-dvid-blocks.json".format(roi_description), 'w') as f: json.dump(rle.tolist(), f) # it might make sense to randomly map partitions for selection # in case something pathological is happening -- if original partitioner # is randomish than this should be fine def subset_part(sid_data): (sid, _data) = sid_data if (sid % num_iters) == iternum: return True return False # should preserve partitioner distsubvolumes_part = distsubvolumes.filter(subset_part) if rollback_seg: readable_seg_checkpoint_dir = seg_checkpoint_dir else: readable_seg_checkpoint_dir = "" subvols_with_seg_cache, subvols_without_seg_cache = \ CreateSegmentation._split_subvols_by_cache_status( readable_seg_checkpoint_dir, distsubvolumes_part.values().collect() ) ## ## CACHED SUBVOLS ## cached_subvols_rdd = self.sparkdvid_context.sc.parallelize( subvols_with_seg_cache, len(subvols_with_seg_cache) or None) # Load as many seg blocks from cache as possible if subvols_with_seg_cache: def retrieve_seg_from_cache(subvol): z1, y1, x1, z2, y2, x2 = subvol.box_with_border block_bounds = ((z1, y1, x1), (z2, y2, x2)) block_store = H5BlockStore(seg_checkpoint_dir, mode='r') h5_block = block_store.get_block(block_bounds) return h5_block[:] cached_seg_chunks = cached_subvols_rdd.map( retrieve_seg_from_cache) else: cached_seg_chunks = self.sparkdvid_context.sc.parallelize( []) # empty rdd cached_seg_chunks.persist() cached_seg_max_ids = cached_seg_chunks.map(np.max) # (subvol, (seg, max_id)) cached_seg_chunks_kv = cached_subvols_rdd.zip( cached_seg_chunks.zip(cached_seg_max_ids)) ## ## UNCACHED SUBVOLS ## uncached_subvols = self.sparkdvid_context.sc.parallelize( subvols_without_seg_cache, len(subvols_without_seg_cache) or None) uncached_subvols.persist() def prepend_sv_index(subvol): return (subvol.sv_index, subvol) uncached_subvols_kv_rdd = uncached_subvols.map(prepend_sv_index) # get grayscale chunks with specified overlap uncached_sv_and_gray = self.sparkdvid_context.map_grayscale8( uncached_subvols_kv_rdd, self.config_data["dvid-info"]["grayscale"]) uncached_gray_vols = select_item(uncached_sv_and_gray, 1, 1) # small hack since segmentor is unaware for current iteration # perhaps just declare the segment function to have an arbitrary number of parameters if type(segmentor) == Segmentor: computed_seg_chunks = segmentor.segment( uncached_subvols, uncached_gray_vols, gray_checkpoint_dir, mask_checkpoint_dir, pred_checkpoint_dir, sp_checkpoint_dir, seg_checkpoint_dir, rollback_pred, False, rollback_seg) else: computed_seg_chunks = segmentor.segment( uncached_subvols, uncached_gray_vols) computed_seg_chunks.persist() computed_seg_max_ids = computed_seg_chunks.map(np.max) # (subvol, (seg, max_id)) computed_seg_chunks_kv = uncached_subvols.zip( computed_seg_chunks.zip(computed_seg_max_ids)) ## ## FINAL LIST: COMBINED CACHED+UNCACHED ## # (subvol, (seg, max_id)) seg_chunks = cached_seg_chunks_kv.union(computed_seg_chunks_kv) seg_chunks.persist(StorageLevel.MEMORY_AND_DISK) seg_chunks_list.append(seg_chunks) seg_chunks = seg_chunks_list[0] for iter1 in range(1, len(seg_chunks_list)): # ?? does this preserve the partitioner (yes, if num partitions is the same) # this could cause a serialization problems if there are a large number of iterations (>100) seg_chunks = seg_chunks.union(seg_chunks_list[iter1]) del seg_chunks_list # persist through stitch # any forced persistence will result in costly # pickling, lz4 compressed numpy array should help seg_chunks.persist(StorageLevel.MEMORY_AND_DISK) # stitch the segmentation chunks # (preserves initial partitioning) mapped_seg_chunks = segmentor.stitch(seg_chunks) def prepend_key(item): subvol, _ = item return (subvol.sv_index, item) mapped_seg_chunks = mapped_seg_chunks.map(prepend_key) if self.config_data["options"]["parallelwrites"] > 0: # repartition to fewer partition if there is write bandwidth limits to DVID # (coalesce() doesn't balance the partitions, so we opt for a full shuffle.) mapped_seg_chunks = mapped_seg_chunks.repartition( self.config_data["options"]["parallelwrites"]) # write data to DVID self.sparkdvid_context.foreach_write_labels3d( self.config_data["dvid-info"]["segmentation-name"], mapped_seg_chunks, self.config_data["dvid-info"]["roi"], mutateseg) self.workflow_entry_exit_printer.write_data( "Wrote DVID labels") # write to logger after spark job if self.config_data["options"]["debug"]: # grab 256 cube from ROI node_service = retrieve_node_service( self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], resource_server, resource_port) substacks, packing_factor = node_service.get_roi_partition( str(self.config_data["dvid-info"]["roi"]), 256 // self.blocksize) if self.resource_server != "": label_volume = node_service.get_labels3D( str(self.config_data["dvid-info"]["segmentation-name"]), (256, 256, 256), (substacks[0].z, substacks[0].y, substacks[0].x), compress=True, throttle=False) else: label_volume = node_service.get_labels3D( str(self.config_data["dvid-info"]["segmentation-name"]), (256, 256, 256), (substacks[0].z, substacks[0].y, substacks[0].x), compress=True) # dump checksum import hashlib md5 = hashlib.md5() md5.update(label_volume) print("DEBUG: ", md5.hexdigest())
def execute(self): from PIL import Image import numpy import os iterslices = self.BLKSIZE * self.config_data["options"]["numblocklayers"] minslice = self.config_data["minslice"] # map file to numpy array basename = self.config_data["basename"] # format should be gs://<bucket>/path gbucketname = "" gpath = "" if basename.startswith('gs://'): # parse google bucket names tempgs = basename.split('//') bucketpath = tempgs[1].split('/') gbucketname = bucketpath[0] gpath = '/'.join(bucketpath[1:]) server = None xoffset = yoffset = zoffset = 0 if "offset" in self.config_data["options"]: xoffset = self.config_data["options"]["offset"][0] yoffset = self.config_data["options"]["offset"][1] zoffset = self.config_data["options"]["offset"][2] if xoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if yoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") if zoffset % self.BLKSIZE != 0: raise Exception("offset not block aligned") xoffset /= self.BLKSIZE yoffset /= self.BLKSIZE zoffset /= self.BLKSIZE # this will start the Z block writing at the specified offse # (changes default behavior when loading nonzero starting image slice) zoffset -= (minslice // self.BLKSIZE) # create metadata before workers start if using DVID if "output-dir" not in self.config_data or self.config_data["output-dir"] == "": # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] resource_server = str(self.resource_server) resource_port = self.resource_port # create grayscale type node_service = retrieve_node_service(server, uuid, resource_server, resource_port, self.APPNAME) node_service.create_grayscale8(str(grayname), self.BLKSIZE) for slice in range(self.config_data["minslice"], self.config_data["maxslice"]+1, iterslices): # parallelize images across many machines imgs = self.sc.parallelize(list(range(slice, slice+iterslices)), iterslices) def img2npy(slicenum): try: img = None if gbucketname == "": img = Image.open(basename % slicenum) else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % slicenum) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) return slicenum, numpy.array(img) except Exception as e: # just return a blank slice -- will be handled downstream return slicenum, numpy.zeros((0,0), numpy.uint8) npy_images = imgs.map(img2npy) # map numpy array into y lines of block height blocksize = self.BLKSIZE blocklimit = self.BLOCKLIMIT def npy2lines(arrpair): z, arr = arrpair ysize, xsize = arr.shape npylines = [] for itery in range(0, ysize, blocksize): line = numpy.zeros((blocksize, ((xsize-1) // blocksize + 1)*blocksize), numpy.uint8) uppery = blocksize if (itery + blocksize) > ysize: uppery = ysize - itery line[0:uppery, 0:xsize] = arr[itery:itery+blocksize, 0:xsize] npylines.append((itery // blocksize, (z, line))) return npylines npy_lines = npy_images.flatMap(npy2lines) # reduce y lines into DVID blocks groupedlines = npy_lines.groupByKey() # map y lines => (y, blocks) def lines2blocks(linespair): y, linesp = linespair xsize = None blockdata = None for z, line in linesp: if xsize is None: _, xsize = line.shape blockdata = numpy.zeros((iterslices, blocksize, xsize), numpy.uint8) blockdata[(z - minslice)%iterslices, :, :] = line return y, blockdata yblocks = groupedlines.map(lines2blocks) # map multilayer of blocks to an array of single layer blocks def multi2single(yblocks): ybindex, blocks = yblocks blockarr = [] num_layers = iterslices // blocksize for layer in range(0,num_layers): blockarr.append(((ybindex, layer), blocks[layer*blocksize:(layer*blocksize+blocksize),:,:])) return blockarr yblockssplit = yblocks.flatMap(multi2single) if "output-dir" in self.config_data and self.config_data["output-dir"] != "": # write blocks to disk for separte post-process -- write directly to DVID eventually? output_dir = self.config_data["output-dir"] def write2disk(yblocks): zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize,ysize,xsize = blocks.shape outdir = output_dir outdir += "/" + ("%05d" % zbindex) + ".z/" filename = outdir + ("%05d" % ybindex) + "-" + str(xsize // blocksize) + ".blocks" try: os.makedirs(outdir) except Exception as e: pass # extract blocks from buffer and write to disk fout = open(filename, 'wb') for iterx in range(0, xsize, blocksize): block = blocks[:,:,iterx:iterx+blocksize].copy() fout.write(block) fout.close() yblockssplit.foreach(write2disk) else: # write to dvid server = self.config_data["dvid-info"]["dvid-server"] uuid = self.config_data["dvid-info"]["uuid"] grayname = self.config_data["dvid-info"]["grayname"] appname = self.APPNAME delimiter = self.config_data["options"]["blankdelimiter"] def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize,ysize,xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:,:,iterx:iterx+blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request(str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex+xoffset, ybindex+yoffset, zbindex+zoffset, xrun)), blockbuffer, ConnectionMethod.POST) yblockssplit.foreach(write2dvid) self.workflow_entry_exit_printer.write_data("Ingested %d slices" % iterslices) # just fetch one image at driver to get dims width = height = 1 try: img = None if gbucketname == "": img = Image.open(basename % minslice) width, height = img.width, img.height else: from gcloud import storage from io import BytesIO client = storage.Client() gbucket = client.get_bucket(gbucketname) gblob = gbucket.get_blob(gpath % minslice) # write to bytes which implements file interface gblobfile = BytesIO() gblob.download_to_file(gblobfile) gblobfile.seek(0) img = Image.open(gblobfile) width, height = img.width, img.height except Exception as e: # just set size to 1 pass if "output-dir" not in self.config_data or self.config_data["output-dir"] == "": # update metadata grayext = {} grayext["MinPoint"] = [xoffset*self.BLKSIZE,yoffset*self.BLKSIZE,zoffset*self.BLKSIZE+minslice] grayext["MaxPoint"] = [xoffset*self.BLKSIZE + width-1, yoffset*self.BLKSIZE + height-1, zoffset*self.BLKSIZE+minslice + self.config_data["maxslice"]] if not server.startswith("http://"): server = "http://" + server session = default_dvid_session() session.post(server + "/api/node/" + uuid + "/" + grayname + "/extents", json=grayext)
def execute(self): # TODO: handle 64 bit segmentation from pyspark import SparkContext from pyspark import StorageLevel from DVIDSparkServices.reconutils.Segmentor import Segmentor self.chunksize = self.config_data["options"]["chunk-size"] # create datatype in the beginning node_service = retrieve_node_service( self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], self.resource_server, self.resource_port) # grab ROI subvolumes and find neighbors distsubvolumes = self.sparkdvid_context.parallelize_roi( self.config_data["dvid-info"]["roi"], self.chunksize, self.contextbuffer, True) contextbuffer = self.contextbuffer # do not recompute ROI for each iteration distsubvolumes.persist() # instantiate the voxel prediction plugin import importlib full_function_name = self.config_data["options"]["predict-voxels"][ "function"] module_name = '.'.join(full_function_name.split('.')[:-1]) function_name = full_function_name.split('.')[-1] module = importlib.import_module(module_name) parameters = self.config_data["options"]["predict-voxels"][ "parameters"] vprediction_function = partial(getattr(module, function_name), **parameters) # determine number of iterations num_parts = len(distsubvolumes.collect()) iteration_size = self.config_data["options"]["iteration-size"] if iteration_size == 0: iteration_size = num_parts num_iters = num_parts // iteration_size if num_parts % iteration_size > 0: num_iters += 1 feature_chunk_list = [] # enable checkpointing if not empty checkpoint_dir = self.config_data["options"]["checkpoint-dir"] # enable rollback of iterations if necessary rollback = False if self.config_data["options"]["checkpoint"]: rollback = True for iternum in range(0, num_iters): # it might make sense to randomly map partitions for selection # in case something pathological is happening -- if original partitioner # is randomish than this should be fine def subset_part(sid_data): (s_id, _data) = sid_data if (s_id % num_iters) == iternum: return True return False # should preserve partitioner distsubvolumes_part = distsubvolumes.filter(subset_part) # get grayscale chunks with specified overlap gray_chunks = self.sparkdvid_context.map_grayscale8( distsubvolumes_part, self.config_data["dvid-info"]["grayscale"]) pred_checkpoint_dir = "" if checkpoint_dir: pred_checkpoint_dir = checkpoint_dir + "/prediter-" + str( iternum) # For now, we always read predictions if available, and always write them if not. # TODO: Add config settings to control read/write behavior. @Segmentor.use_block_cache(pred_checkpoint_dir, allow_read=True, allow_write=True) def predict_voxels(sv_gray): (_subvolume, gray) = sv_gray return vprediction_function(gray, None) vox_preds = gray_chunks.values().map( predict_voxels) # predictions only vox_preds = distsubvolumes_part.values().zip( vox_preds) # (subvolume, predictions) pdconf = self.config_data["dvid-info"] resource_server = self.resource_server resource_port = self.resource_port # retrieve segmentation and generate features def generate_features(vox_pred): import numpy (subvolume, pred) = vox_pred pred = numpy.ascontiguousarray(pred) # extract labelblks border = 1 # only one pixel needed to find edges # get sizes of box size_z = subvolume.box.z2 + 2 * border - subvolume.box.z1 size_y = subvolume.box.y2 + 2 * border - subvolume.box.y1 size_x = subvolume.box.x2 + 2 * border - subvolume.box.x1 # retrieve data from box start position considering border # !! technically ROI is not respected but unwritten segmentation will be ignored since it will have 0-valued pixels. @auto_retry(3, pause_between_tries=60.0, logging_name=__name__) def get_seg(): node_service = retrieve_node_service( pdconf["dvid-server"], pdconf["uuid"], resource_server, resource_port) # retrieve data from box start position # Note: libdvid uses zyx order for python functions if resource_server != "": return node_service.get_labels3D( str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2 - border, subvolume.box.y1 - border, subvolume.box.x1 - border)) else: return node_service.get_labels3D( str(pdconf["segmentation-name"]), (size_z, size_y, size_x), (subvolume.box.z2 - border, subvolume.box.y1 - border, subvolume.box.x1 - border)) initial_seg = get_seg() # !!! potentially dangerous but needed for now initial_seg = initial_seg.astype(numpy.uint32) pred2 = pred[(contextbuffer - border):-(contextbuffer - border), (contextbuffer - border):-(contextbuffer - border), (contextbuffer - border):-(contextbuffer - border), :].copy() z, y, x, num_chans = pred2.shape # call neuroproof and generate features from neuroproof import FocusedProofreading # "edges": [ edge ] where edge = [node1, node2, edgesize, all features...] # "vertices": [vertex ] where vertex = [id, size, all features...] features = FocusedProofreading.extract_features( initial_seg, pred2) element_list = [] # iterate edges and create ((node1, node2), features) if "Edges" in features: # could have only one vertex in a partition and no edges for edge in features["Edges"]: n1 = edge["Id1"] n2 = edge["Id2"] edge["Loc1"][0] += subvolume.box.x1 edge["Loc1"][1] += subvolume.box.y1 edge["Loc1"][2] += subvolume.box.z1 edge["Loc2"][0] += subvolume.box.x1 edge["Loc2"][1] += subvolume.box.y1 edge["Loc2"][2] += subvolume.box.z1 if n1 > n2: n1, n2 = n2, n1 element_list.append(((n1, n2), (num_chans, edge))) for node in features["Vertices"]: n1 = node["Id"] element_list.append(((n1, -1), (num_chans, node))) return element_list features = vox_preds.flatMap(generate_features) # retrieve previously computed RDD or save current RDD if checkpoint_dir != "": features = self.sparkdvid_context.checkpointRDD( features, checkpoint_dir + "/featureiter-" + str(iternum), rollback) # any forced persistence will result in costly # pickling, lz4 compressed numpy array should help features.persist(StorageLevel.MEMORY_AND_DISK_SER) feature_chunk_list.append(features) features = feature_chunk_list[0] for iter1 in range(1, len(feature_chunk_list)): # this could cause a serialization problems if there are a large number of iterations (>100) features = feature.union(feature_chunk_list[iter1]) # grab num channels from boundary prediction features.persist(StorageLevel.MEMORY_AND_DISK_SER) first_feature = features.first() (key1, key2), (num_channels, foo) = first_feature # remove num channels from features def remove_num_channels(featurepair): foo, feature = featurepair return feature features = features.mapValues(remove_num_channels) import json # merge edge and node features -- does not require reading classifier # node features are encoded as (vertex id, -1) def combine_edge_features(element1, element2): from neuroproof import FocusedProofreading if "Id2" in element1: # are edges return FocusedProofreading.combine_edge_features( json.dumps(element1, cls=NumpyConvertingEncoder), json.dumps(element2, cls=NumpyConvertingEncoder), num_channels) else: # are vertices return FocusedProofreading.combine_vertex_features( json.dumps(element1, cls=NumpyConvertingEncoder), json.dumps(element2, cls=NumpyConvertingEncoder), num_channels) features_combined = features.reduceByKey(combine_edge_features) #features_combined.persist() # TODO: option to serialize features to enable other analyses # join node and edge probs def retrieve_nodes(val): (n1, n2), features = val if n2 == -1: return True return False def retrieve_edges(val): (n1, n2), features = val if n2 == -1: return False return True node_features = features_combined.filter(retrieve_nodes) edge_features = features_combined.filter(retrieve_edges) node_features = node_features.map(lambda x: (x[0][0], x[1])) edge1_features = edge_features.map(lambda x: (x[0][0], x[1])) edge2_features = edge_features.map(lambda x: (x[0][1], x[1])) # multiple edges with the same key edge1_node_features = edge1_features.leftOuterJoin(node_features) edge2_node_features = edge2_features.leftOuterJoin(node_features) def reset_edgekey(val): key, (edge, node) = val n1 = edge["Id1"] n2 = edge["Id2"] if n1 > n2: n1, n2 = n2, n1 return ((n1, n2), (edge, node)) edge1_node_features = edge1_node_features.map(reset_edgekey) edge2_node_features = edge2_node_features.map(reset_edgekey) edge_node_features = edge1_node_features.join(edge2_node_features) # generate prob for each edge (JSON: body sizes, edge list with prob) classifierlocation = self.config_data["options"]["segment-classifier"] def compute_prob(edge_node_features): from neuroproof import FocusedProofreading classifier = FocusedProofreading.ComputeProb( str(classifierlocation), num_channels) res_list = [] for edge_node_edge_node in edge_node_features: edge_key, ((edge, node1), (edge_dummy, node2)) = edge_node_edge_node weight = classifier.compute_prob( json.dumps(edge, cls=NumpyConvertingEncoder), json.dumps(node1, cls=NumpyConvertingEncoder), json.dumps(node2, cls=NumpyConvertingEncoder)) # node1, node2 res_list.append( (int(node1["Id"]), int(node2["Id"]), int(node1["Weight"]), int(node2["Weight"]), int(edge["Weight"]), weight, edge["Loc1"], edge["Loc2"])) return res_list # avoid loading large classifier for each small edge allprobs = edge_node_features.mapPartitions(compute_prob) # collect all edges and send to DVID (TODO: add option to dump to disk) allprobs_combined = allprobs.collect() bodyinfo = {} edges = [] for edge_info in allprobs_combined: node1, node2, node1_size, node2_size, edge_size, weight, loc1, loc2 = edge_info bodyinfo[node1] = node1_size bodyinfo[node2] = node2_size edges.append({ "Id1": node1, "Id2": node2, "Weight": weight, "Loc1": loc1, "Loc2": loc2 }) bodies = [] for (key, val) in bodyinfo.items(): bodies.append({"Id": key, "Weight": val}) graph = {} graph["Vertices"] = bodies graph["Edges"] = edges SAVE_TO_FILE = False if SAVE_TO_FILE: graph_filepath = '/tmp/graph-output.json' with open(graph_filepath, 'w') as f: self.workflow_entry_exit_printer.warn( "Writing graph json to file:\n{}".format(graph_filepath)) import json json.dump(graph, f, indent=4, separators=(',', ': '), cls=NumpyConvertingEncoder) self.workflow_entry_exit_printer.write_data( "Wrote graph to disk") # write to logger after spark job UPLOAD_TO_DVID = True if UPLOAD_TO_DVID: # load entire graph into DVID node_service.create_graph( str(self.config_data["dvid-info"]["graph-name"])) server = str(self.config_data["dvid-info"]["dvid-server"]) #if not server.startswith("http://"): # server = "http://" + server #session = default_dvid_session() #session.post(server + "/api/node/" + str(self.config_data["dvid-info"]["uuid"]) + "/" + str(self.config_data["dvid-info"]["graph-name"]) + "/subgraph", json=graph) #self.workflow_entry_exit_printer.write_data("Wrote DVID graph") # write to logger after spark job if self.config_data["options"]["debug"]: import json print("DEBUG:", json.dumps(graph, cls=NumpyConvertingEncoder)) # write dvid to specified file (if provided) if "output-file" in self.config_data["options"] and self.config_data[ "options"]["output-file"] != "": filename = self.config_data["options"]["output-file"] edgelist = [] for edge in graph["Edges"]: edgelist.append({ "node1": edge["Id1"], "node2": edge["Id2"], "weight": edge["Weight"], "loc1": edge["Loc1"], "loc2": edge["Loc2"] }) npgraph = {} npgraph["edge_list"] = edgelist fout = open(filename, 'w') fout.write(json.dumps(npgraph, cls=NumpyConvertingEncoder))
def write2dvid(yblocks): from libdvid import ConnectionMethod import numpy node_service = retrieve_node_service( server, uuid, resource_server, resource_port, appname) # get block coordinates zbindex = slice // blocksize (ybindex, layer), blocks = yblocks zbindex += layer zsize, ysize, xsize = blocks.shape xrun = xsize // blocksize xbindex = 0 # assume x starts at 0!! # retrieve blocks blockbuffer = "" # skip blank blocks startblock = False xrun = 0 xbindex = 0 for iterx in range(0, xsize, blocksize): block = blocks[:, :, iterx:iterx + blocksize].copy() vals = numpy.unique(block) if len(vals) == 1 and vals[0] == delimiter: # check if the block is blank if startblock: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" else: if startblock == False: xbindex = iterx // blocksize startblock = True blockbuffer += block.tobytes() xrun += 1 if blocklimit > 0 and xrun >= blocklimit: # if the previous block has data, push blocks in current queue node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST) startblock = False xrun = 0 blockbuffer = "" # write-out leftover blocks if xrun > 0: node_service.custom_request( str((grayname + "/blocks/%d_%d_%d/%d") % (xbindex + xoffset, ybindex + yoffset, zbindex + zoffset, xrun)), blockbuffer, ConnectionMethod.POST)
def execute(self): from pyspark import SparkContext from pyspark import StorageLevel from DVIDSparkServices.reconutils.Segmentor import Segmentor resource_server = self.resource_server resource_port = self.resource_port self.chunksize = self.config_data["options"]["chunk-size"] # create datatype in the beginning mutateseg = self.config_data["options"]["mutateseg"] node_service = retrieve_node_service(self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], resource_server, resource_port) success = node_service.create_labelblk(str(self.config_data["dvid-info"]["segmentation-name"])) # check whether seg should be mutated if (not success and mutateseg == "auto") or mutateseg == "yes": mutateseg = "yes" else: mutateseg = "no" # grab ROI subvolumes and find neighbors distsubvolumes = self.sparkdvid_context.parallelize_roi( self.config_data["dvid-info"]["roi"], self.chunksize, self.overlap // 2, True, self.config_data["dvid-info"]["partition-method"], self.config_data["dvid-info"]["partition-filter"] ) # do not recompute ROI for each iteration distsubvolumes.persist() num_parts = len(distsubvolumes.collect()) # Instantiate the correct Segmentor subclass (must be installed) import importlib full_segmentor_classname = self.config_data["options"]["segmentor"]["class"] segmentor_classname = full_segmentor_classname.split('.')[-1] module_name = '.'.join(full_segmentor_classname.split('.')[:-1]) segmentor_mod = importlib.import_module(module_name) segmentor_class = getattr(segmentor_mod, segmentor_classname) segmentor = segmentor_class(self.sparkdvid_context, self) # determine number of iterations iteration_size = self.config_data["options"]["iteration-size"] if iteration_size == 0: iteration_size = num_parts num_iters = num_parts // iteration_size if num_parts % iteration_size > 0: num_iters += 1 seg_chunks_list = [] # enable checkpointing if not empty checkpoint_dir = self.config_data["options"]["checkpoint-dir"] # enable rollback of iterations if necessary rollback_seg = (self.config_data["options"]["checkpoint"] == "segmentation") # enable rollback of boundary prediction if necessary rollback_pred = (rollback_seg or self.config_data["options"]["checkpoint"] == "voxel") for iternum in range(0, num_iters): # Disable rollback by setting checkpoint dirs to empty gray_checkpoint_dir = mask_checkpoint_dir = pred_checkpoint_dir = sp_checkpoint_dir = seg_checkpoint_dir = "" if checkpoint_dir != "": pred_checkpoint_dir = checkpoint_dir + "/prediter-" + str(iternum) seg_checkpoint_dir = checkpoint_dir + "/segiter-" + str(iternum) # Grayscale and SP caches are only written to as a "debug" feature if self.config_data["options"]["debug"]: gray_checkpoint_dir = checkpoint_dir + "/grayiter-" + str(iternum) mask_checkpoint_dir = checkpoint_dir + "/maskiter-" + str(iternum) sp_checkpoint_dir = checkpoint_dir + "/spiter-" + str(iternum) roi = self.config_data["dvid-info"]["roi"] method = self.config_data["dvid-info"]["partition-method"] roi_description = roi if method != "ask-dvid": roi_description += "-" + method roi_filter = self.config_data["dvid-info"]["partition-filter"] if roi_filter != "all": roi_description += "-" + roi_filter # Spit out a JSON of the Subvolume list boxes ids_and_subvols = distsubvolumes.collect() subvols = [v for (_k,v) in ids_and_subvols] subvol_bounds_json = Subvolume.subvol_list_to_json( subvols ) mkdir_p(checkpoint_dir) with open(checkpoint_dir + "/{}-subvol-bounds.json".format(roi_description), 'w') as f: f.write( subvol_bounds_json ) # Also spit out JSON RLE for writing the modified ROI directly to DVID, in case that's useful all_blocks = Subvolume.subvol_list_all_blocks(subvols) rle = runlength_encode(all_blocks, assume_sorted=False) with open(checkpoint_dir + "/{}-dvid-blocks.json".format(roi_description), 'w') as f: json.dump(rle.tolist(), f) # it might make sense to randomly map partitions for selection # in case something pathological is happening -- if original partitioner # is randomish than this should be fine def subset_part(sid_data): (sid, _data) = sid_data if (sid % num_iters) == iternum: return True return False # should preserve partitioner distsubvolumes_part = distsubvolumes.filter(subset_part) if rollback_seg: readable_seg_checkpoint_dir = seg_checkpoint_dir else: readable_seg_checkpoint_dir = "" subvols_with_seg_cache, subvols_without_seg_cache = \ CreateSegmentation._split_subvols_by_cache_status( readable_seg_checkpoint_dir, distsubvolumes_part.values().collect() ) ## ## CACHED SUBVOLS ## cached_subvols_rdd = self.sparkdvid_context.sc.parallelize(subvols_with_seg_cache, len(subvols_with_seg_cache) or None) # Load as many seg blocks from cache as possible if subvols_with_seg_cache: def retrieve_seg_from_cache(subvol): z1, y1, x1, z2, y2, x2 = subvol.box_with_border block_bounds = ((z1, y1, x1), (z2, y2, x2)) block_store = H5BlockStore(seg_checkpoint_dir, mode='r') h5_block = block_store.get_block( block_bounds ) return h5_block[:] cached_seg_chunks = cached_subvols_rdd.map(retrieve_seg_from_cache) else: cached_seg_chunks = self.sparkdvid_context.sc.parallelize([]) # empty rdd cached_seg_chunks.persist() cached_seg_max_ids = cached_seg_chunks.map(np.max) # (subvol, (seg, max_id)) cached_seg_chunks_kv = cached_subvols_rdd.zip( cached_seg_chunks.zip(cached_seg_max_ids) ) ## ## UNCACHED SUBVOLS ## uncached_subvols = self.sparkdvid_context.sc.parallelize(subvols_without_seg_cache, len(subvols_without_seg_cache) or None) uncached_subvols.persist() def prepend_sv_index(subvol): return (subvol.sv_index, subvol) uncached_subvols_kv_rdd = uncached_subvols.map(prepend_sv_index) # get grayscale chunks with specified overlap uncached_sv_and_gray = self.sparkdvid_context.map_grayscale8(uncached_subvols_kv_rdd, self.config_data["dvid-info"]["grayscale"]) uncached_gray_vols = select_item(uncached_sv_and_gray, 1, 1) # small hack since segmentor is unaware for current iteration # perhaps just declare the segment function to have an arbitrary number of parameters if type(segmentor) == Segmentor: computed_seg_chunks = segmentor.segment(uncached_subvols, uncached_gray_vols, gray_checkpoint_dir, mask_checkpoint_dir, pred_checkpoint_dir, sp_checkpoint_dir, seg_checkpoint_dir, rollback_pred, False, rollback_seg) else: computed_seg_chunks = segmentor.segment(uncached_subvols, uncached_gray_vols) computed_seg_chunks.persist() computed_seg_max_ids = computed_seg_chunks.map( np.max ) # (subvol, (seg, max_id)) computed_seg_chunks_kv = uncached_subvols.zip( computed_seg_chunks.zip(computed_seg_max_ids) ) ## ## FINAL LIST: COMBINED CACHED+UNCACHED ## # (subvol, (seg, max_id)) seg_chunks = cached_seg_chunks_kv.union(computed_seg_chunks_kv) seg_chunks.persist(StorageLevel.MEMORY_AND_DISK) seg_chunks_list.append(seg_chunks) seg_chunks = seg_chunks_list[0] for iter1 in range(1, len(seg_chunks_list)): # ?? does this preserve the partitioner (yes, if num partitions is the same) # this could cause a serialization problems if there are a large number of iterations (>100) seg_chunks = seg_chunks.union(seg_chunks_list[iter1]) del seg_chunks_list # persist through stitch # any forced persistence will result in costly # pickling, lz4 compressed numpy array should help seg_chunks.persist(StorageLevel.MEMORY_AND_DISK) # stitch the segmentation chunks # (preserves initial partitioning) mapped_seg_chunks = segmentor.stitch(seg_chunks) def prepend_key(item): subvol, _ = item return (subvol.sv_index, item) mapped_seg_chunks = mapped_seg_chunks.map(prepend_key) if self.config_data["options"]["parallelwrites"] > 0: # repartition to fewer partition if there is write bandwidth limits to DVID # (coalesce() doesn't balance the partitions, so we opt for a full shuffle.) mapped_seg_chunks = mapped_seg_chunks.repartition(self.config_data["options"]["parallelwrites"]) # write data to DVID self.sparkdvid_context.foreach_write_labels3d(self.config_data["dvid-info"]["segmentation-name"], mapped_seg_chunks, self.config_data["dvid-info"]["roi"], mutateseg) self.workflow_entry_exit_printer.write_data("Wrote DVID labels") # write to logger after spark job if self.config_data["options"]["debug"]: # grab 256 cube from ROI node_service = retrieve_node_service(self.config_data["dvid-info"]["dvid-server"], self.config_data["dvid-info"]["uuid"], resource_server, resource_port) substacks, packing_factor = node_service.get_roi_partition(str(self.config_data["dvid-info"]["roi"]), 256 // self.blocksize) if self.resource_server != "": label_volume = node_service.get_labels3D( str(self.config_data["dvid-info"]["segmentation-name"]), (256,256,256), (substacks[0].z, substacks[0].y, substacks[0].x), compress=True, throttle=False ) else: label_volume = node_service.get_labels3D( str(self.config_data["dvid-info"]["segmentation-name"]), (256,256,256), (substacks[0].z, substacks[0].y, substacks[0].x), compress=True ) # dump checksum import hashlib md5 = hashlib.md5() md5.update( label_volume ) print("DEBUG: ", md5.hexdigest())
def write_blocks(part_vol): logger = logging.getLogger(__name__) part, data = part_vol offset = part.get_offset() reloffset = part.get_reloffset() _, _, x_size = data.shape if x_size % blksize != 0: # check if padded raise ValueError("Data is not block aligned") shiftedoffset = (offset.z+reloffset.z, offset.y+reloffset.y, offset.x+reloffset.x) logger.info("Starting WRITE of partition at: {} size: {}".format(shiftedoffset, data.shape)) node_service = retrieve_node_service(server, uuid, resource_server, resource_port, appname) # Find all non-zero blocks (and record by block index) block_coords = [] for block_index, block_x in enumerate(range(0, x_size, blksize)): if not (data[:, :, block_x:block_x+blksize] == delimiter).all(): block_coords.append( (0, 0, block_index) ) # (Don't care about Z,Y indexes, just X-index) # Find *runs* of non-zero blocks block_runs = runlength_encode(block_coords, True) # returns [[Z,Y,X1,X2], [Z,Y,X1,X2], ...] # Convert stop indexes from inclusive to exclusive block_runs[:,-1] += 1 # Discard Z,Y indexes and convert from indexes to pixels ranges = blksize * block_runs[:, 2:4] # iterate through contiguous blocks and write to DVID # TODO: write compressed data directly into DVID for (data_x_start, data_x_end) in ranges: with Timer() as copy_timer: datacrop = data[:,:,data_x_start:data_x_end].copy() logger.info("Copied {}:{} in {:.3f} seconds".format(data_x_start, data_x_end, copy_timer.seconds)) data_offset_zyx = (shiftedoffset[0], shiftedoffset[1], shiftedoffset[2] + data_x_start) if dataname is not None: with Timer() as put_timer: if not israw: logger.info("STARTING Put: labels block {}".format(data_offset_zyx)) if resource_server != "" or dvid_info["dvid-server"].startswith("http://127.0.0.1"): node_service.put_labels3D(dataname, datacrop, data_offset_zyx, compress=True, throttle=False) else: node_service.put_labels3D(dataname, datacrop, data_offset_zyx, compress=True) else: logger.info("STARTING Put: raw block {}".format(data_offset_zyx)) if resource_server != "" or dvid_info["dvid-server"].startswith("http://127.0.0.1"): node_service.put_gray3D(dataname, datacrop, data_offset_zyx, compress=False, throttle=False) else: node_service.put_gray3D(dataname, datacrop, data_offset_zyx, compress=False) logger.info("Put block {} in {:.3f} seconds".format(data_offset_zyx, put_timer.seconds)) if dataname_lossy is not None: logger.info("STARTING Put: lossy block {}".format(data_offset_zyx)) with Timer() as put_lossy_timer: if resource_server != "" or dvid_info["dvid-server"].startswith("http://127.0.0.1"): node_service.put_gray3D(dataname_lossy, datacrop, data_offset_zyx, compress=False, throttle=False) else: node_service.put_gray3D(dataname_lossy, datacrop, data_offset_zyx, compress=False) logger.info("Put lossy block {} in {:.3f} seconds".format(data_offset_zyx, put_lossy_timer.seconds))