def testFailedProcessing(self): op = OpArrayPiper(graph=Graph()) inputData = numpy.indices((100, 100)).sum(0) op.Input.setValue(inputData) roiList = [] block_starts = getIntersectingBlocks([10, 10], ([0, 0], [100, 100])) for block_start in block_starts: roiList.append(getBlockBounds([100, 100], [10, 10], block_start)) class SpecialException(Exception): pass def handleResult(roi, result): raise SpecialException( "Intentional Exception: raised while handling the result") totalVolume = numpy.prod(inputData.shape) batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10, allowParallelResults=False) batch.resultSignal.subscribe(handleResult) # FIXME: There are multiple places where the RoiRequestBatch tool should be prepared to handle exceptions. # This only tests one of them (in the notify_finished() handler) try: batch.execute() except SpecialException: pass else: assert False, "Expected exception to be propagated out of the RoiRequestBatch."
def testFailedProcessing(self): op = OpArrayPiper( graph=Graph() ) inputData = numpy.indices( (100,100) ).sum(0) op.Input.setValue( inputData ) roiList = [] block_starts = getIntersectingBlocks( [10,10], ([0,0], [100, 100]) ) for block_start in block_starts: roiList.append( getBlockBounds( [100,100], [10,10], block_start ) ) class SpecialException(Exception): pass def handleResult(roi, result): raise SpecialException("Intentional Exception: raised while handling the result") totalVolume = numpy.prod( inputData.shape ) batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10, allowParallelResults=False) batch.resultSignal.subscribe( handleResult ) # FIXME: There are multiple places where the RoiRequestBatch tool should be prepared to handle exceptions. # This only tests one of them (in the notify_finished() handler) try: batch.execute() except SpecialException: pass else: assert False, "Expected exception to be propagated out of the RoiRequestBatch."
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a RoiRequestBatch object, which is responsible for creating and scheduling the request for each roi. """ def __init__(self, outputSlot, roi, minBlockShape, batchSize=None): self._outputSlot = outputSlot self._bigRoi = roi self._minBlockShape = minBlockShape if batchSize is None: batchSize=2 # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi) self._minBlockStarts += roi[0] # Un-offset totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) ) # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = self._minBlockStarts.__iter__() while True: block_start = block_iter.next() # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start ) # Un-offset block_bounds = ( offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0] ) logger.debug( "Requesting Roi: {}".format( block_bounds ) ) yield block_bounds self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize ) @property def progressSignal(self): return self._requestBatch.progressSignal @property def resultSignal(self): return self._requestBatch.resultSignal def execute(self): self._requestBatch.execute()
def testBasic(self): op = OpArrayPiper(graph=Graph()) inputData = numpy.indices((100, 100)).sum(0) op.Input.setValue(inputData) roiList = [] block_starts = getIntersectingBlocks([10, 10], ([0, 0], [100, 100])) for block_start in block_starts: roiList.append(getBlockBounds([100, 100], [10, 10], block_start)) results = numpy.zeros((100, 100), dtype=numpy.int32) resultslock = threading.Lock() resultsCount = [0] def handleResult(roi, result): acquired = resultslock.acquire(False) assert acquired, "resultslock is contested! Access to callback is supposed to be automatically serialized." results[roiToSlice(*roi)] = result logger.debug("Got result for {}".format(roi)) resultslock.release() resultsCount[0] += 1 progressList = [] def handleProgress(progress): progressList.append(progress) logger.debug("Progress update: {}".format(progress)) totalVolume = numpy.prod(inputData.shape) batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10, allowParallelResults=False) batch.resultSignal.subscribe(handleResult) batch.progressSignal.subscribe(handleProgress) batch.execute() logger.debug("Got {} results".format(resultsCount[0])) assert (results == inputData).all() # Progress reporting MUST start with 0 and end with 100 assert progressList[0] == 0, "Invalid progress reporting." assert progressList[-1] == 100, "Invalid progress reporting." # There should be some intermediate progress reporting, but exactly how much is unspecified. assert len(progressList) >= 10 logger.debug("FINISHED")
def run_export(self) -> None: """Export an image from Input to Filepath.""" path = pathlib.Path(self.Filepath.value) if path.exists(): path.unlink() self._page_buf = _NdBuf(self._opReorderAxes.Output.meta.shape[:-2]) batch = RoiRequestBatch( outputSlot=self._opReorderAxes.Output, roiIterator=_page_rois(*self._opReorderAxes.Output.meta.shape), totalVolume=np.prod(self._opReorderAxes.Output.meta.shape), batchSize=self._batch_size, ) batch.progressSignal.subscribe(self.progressSignal) batch.resultSignal.subscribe(self._write_buffered_pages) batch.execute()
def testPropagatesProcessingException(self, op_raising_at_3): roiList = [ ((0, 0, 0), (4, 4, 4)), ((1, 1, 1), (4, 4, 4)), ((2, 2, 2), (4, 4, 4)), ((3, 3, 3), (4, 4, 4)), ] totalVolume = numpy.prod(op_raising_at_3.Output.meta.shape) batch = RoiRequestBatch(op_raising_at_3.Output, roiList.__iter__(), totalVolume, batchSize=1, allowParallelResults=False) with pytest.raises(ProcessingException): batch.execute()
def testBasic(self): op = OpArrayPiper( graph=Graph() ) inputData = numpy.indices( (100,100) ).sum(0) op.Input.setValue( inputData ) roiList = [] block_starts = getIntersectingBlocks( [10,10], ([0,0], [100, 100]) ) for block_start in block_starts: roiList.append( getBlockBounds( [100,100], [10,10], block_start ) ) results = numpy.zeros( (100,100), dtype=numpy.int32 ) resultslock = threading.Lock() resultsCount = [0] def handleResult(roi, result): acquired = resultslock.acquire(False) assert acquired, "resultslock is contested! Access to callback is supposed to be automatically serialized." results[ roiToSlice( *roi ) ] = result logger.debug( "Got result for {}".format(roi) ) resultslock.release() resultsCount[0] += 1 progressList = [] def handleProgress( progress ): progressList.append( progress ) logger.debug( "Progress update: {}".format(progress) ) totalVolume = numpy.prod( inputData.shape ) batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10) batch.resultSignal.subscribe( handleResult ) batch.progressSignal.subscribe( handleProgress ) batch.execute() logger.debug( "Got {} results".format( resultsCount[0] ) ) assert (results == inputData).all() # Progress reporting MUST start with 0 and end with 100 assert progressList[0] == 0, "Invalid progress reporting." assert progressList[-1] == 100, "Invalid progress reporting." # There should be some intermediate progress reporting, but exactly how much is unspecified. assert len(progressList) >= 10 logger.debug( "FINISHED" )
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators.operators import OpArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here if allowParallelResults=True. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress: ") ... sys.stdout.write( "{} ".format( progress ) ) >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] ) Processed 6 result blocks with a total sum of: 68400 """ def __init__(self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment='absolute', allowParallelResults=False): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot. :param batchSize: The maximum number of requests to launch in parallel. This should not be necessary if the blockshape is small enough that you won't run out of RAM. :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'. :param allowParallelResults: If False, The resultSignal will not be called in parallel. In that case, your handler function has no need for locks. """ self._outputSlot = outputSlot self._bigRoi = roi totalVolume = numpy.prod(numpy.subtract(roi[1], roi[0])) if batchSize is None: batchSize = 1000 if blockshape is None: blockshape = self._determine_blockshape(outputSlot) assert blockAlignment in ['relative', 'absolute'] if blockAlignment == 'relative': # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) block_starts = getIntersectingBlocks(blockshape, offsetRoi) block_starts += roi[0] # Un-offset # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = block_starts.__iter__() while True: block_start = block_iter.next() # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds( offset_data_shape, blockshape, offset_block_start) # Un-offset block_bounds = (offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0]) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_bounds else: # Absolute blocking. # Blocks are simply relative to (0,0,0,...) # But we still clip the requests to the overall roi bounds. block_starts = getIntersectingBlocks(blockshape, roi) def roiGen(): block_iter = block_starts.__iter__() while True: block_start = block_iter.next() block_bounds = getBlockBounds(outputSlot.meta.shape, blockshape, block_start) block_intersecting_portion = getIntersection( block_bounds, roi) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_intersecting_portion self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults) def _determine_blockshape(self, outputSlot): """ Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise. """ input_shape = outputSlot.meta.shape max_blockshape = input_shape ideal_blockshape = outputSlot.meta.ideal_blockshape ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel num_threads = max(1, Request.global_thread_pool.num_workers) if lazyflow.AVAILABLE_RAM_MB != 0: available_ram = lazyflow.AVAILABLE_RAM_MB * 1e6 else: available_ram = psutil.virtual_memory().available if ram_usage_per_requested_pixel is None: # Make a conservative guess: 2*(bytes for dtype) * (num channels) + (fudge factor=4) ram_usage_per_requested_pixel = 2 * outputSlot.meta.dtype( ).nbytes * outputSlot.meta.shape[-1] + 4 logger.warn("Unknown per-pixel RAM requirement. Making a guess.") # Safety factor (fudge factor): Double the estimated RAM usage per pixel safety_factor = 2.0 logger.info( "Estimated RAM usage per pixel is {} bytes * safety factor ({})". format(ram_usage_per_requested_pixel, safety_factor)) ram_usage_per_requested_pixel *= safety_factor if ideal_blockshape is None: blockshape = determineBlockShape( input_shape, available_ram / (num_threads * ram_usage_per_requested_pixel)) logger.warn( "Chose an arbitrary request blockshape {}".format(blockshape)) else: logger.info( "determining blockshape assuming available_ram is {} GB, split between {} threads" .format(available_ram / 1e9, num_threads)) # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel' # Therefore, we do not include the channel dimension in the blockshapes here. blockshape = determine_optimal_request_blockshape( max_blockshape[:-1], ideal_blockshape[:-1], ram_usage_per_requested_pixel, num_threads, available_ram) blockshape += (outputSlot.meta.shape[-1], ) logger.info("Chose blockshape: {}".format(blockshape)) logger.info("Estimated RAM usage per block is {} GB".format( ram_usage_per_requested_pixel * numpy.prod(blockshape[:-1]) / 1e9)) return blockshape @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators.operators import OpArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here if allowParallelResults=True. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress: ") ... sys.stdout.write( "{} ".format( progress ) ) >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] ) Processed 6 result blocks with a total sum of: 68400 """ def __init__(self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment='absolute', allowParallelResults=False): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot. :param batchSize: The maximum number of requests to launch in parallel. This should not be necessary if the blockshape is small enough that you won't run out of RAM. :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'. :param allowParallelResults: If True, The resultSignal will not be called in parallel. In that case, your handler function has no need for locks. """ self._outputSlot = outputSlot self._bigRoi = roi totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) ) if batchSize is None: batchSize=1000 if blockshape is None: blockshape = self._determine_blockshape(outputSlot) assert blockAlignment in ['relative', 'absolute'] if blockAlignment == 'relative': # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) block_starts = getIntersectingBlocks(blockshape, offsetRoi) block_starts += roi[0] # Un-offset # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = block_starts.__iter__() while True: block_start = block_iter.next() # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds( offset_data_shape, blockshape, offset_block_start ) # Un-offset block_bounds = ( offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0] ) logger.debug( "Requesting Roi: {}".format( block_bounds ) ) yield block_bounds else: # Absolute blocking. # Blocks are simply relative to (0,0,0,...) # But we still clip the requests to the overall roi bounds. block_starts = getIntersectingBlocks(blockshape, roi) def roiGen(): block_iter = block_starts.__iter__() while True: block_start = block_iter.next() block_bounds = getBlockBounds( outputSlot.meta.shape, blockshape, block_start ) block_intersecting_portion = getIntersection( block_bounds, roi ) logger.debug( "Requesting Roi: {}".format( block_bounds ) ) yield block_intersecting_portion self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults ) def _determine_blockshape(self, outputSlot): """ Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise. """ input_shape = outputSlot.meta.shape max_blockshape = input_shape ideal_blockshape = outputSlot.meta.ideal_blockshape ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel num_threads = Request.global_thread_pool.num_workers available_ram = psutil.virtual_memory().available # Fudge factor: Reduce RAM usage by a bit available_ram *= 0.5 if ram_usage_per_requested_pixel is None: # Make a conservative guess: (bytes for dtype) * (num channels) + (fudge factor=4) ram_usage_per_requested_pixel = 4 + 2*outputSlot.meta.dtype().nbytes*outputSlot.meta.shape[-1] logger.warn( "Unknown RAM usage. Making a guess." ) else: logger.info( "Estimated RAM usage per pixel is {} bytes" .format( ram_usage_per_requested_pixel ) ) if ideal_blockshape is None: blockshape = determineBlockShape( input_shape, available_ram/(num_threads*ram_usage_per_requested_pixel) ) logger.warn( "Chose an arbitrary request blockshape {}".format( blockshape ) ) else: logger.info( "determining blockshape assuming available_ram is {} GB, split between {} threads" .format( available_ram/1e9, num_threads ) ) # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel' # Therefore, we do not include the channel dimension in the blockshapes here. blockshape = determine_optimal_request_blockshape( max_blockshape[:-1], ideal_blockshape[:-1], ram_usage_per_requested_pixel, num_threads, available_ram ) blockshape += (outputSlot.meta.shape[-1],) logger.info( "Chose blockshape: {}".format( blockshape ) ) logger.info( "Estimated RAM usage per block is {} GB" .format( ram_usage_per_requested_pixel * numpy.prod( blockshape[:-1] ) / 1e9 ) ) return blockshape @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators.operators import OpArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2 ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress: ") ... sys.stdout.write( "{} ".format( progress ) ) >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] ) Processed 6 result blocks with a total sum of: 68400 """ def __init__(self, outputSlot, roi, minBlockShape, batchSize=None): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param minBlockShape: The minimum amount of data to request in each request. Note: The current implementation breaks the big request into smaller requests of exactly ``minBlockShape`` size. Future implementations could concatenate smaller requests if it appears the system is not being overloaded by the smaller requests. :param batchSize: The maximum number of requests to launch in parallel. """ self._outputSlot = outputSlot self._bigRoi = roi self._minBlockShape = minBlockShape if batchSize is None: batchSize=2 # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi) self._minBlockStarts += roi[0] # Un-offset totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) ) # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = self._minBlockStarts.__iter__() while True: block_start = block_iter.next() # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start ) # Un-offset block_bounds = ( offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0] ) logger.debug( "Requesting Roi: {}".format( block_bounds ) ) yield block_bounds self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize ) @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators import OpBlockedArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpBlockedArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here if allowParallelResults=True. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress:") ... sys.stdout.write(f" {progress}") >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print(f"Processed {result_count[0]} result blocks with a total sum of: {result_total_sum[0]}") Processed 6 result blocks with a total sum of: 68400 """ def __init__(self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment="absolute", allowParallelResults=False): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot. :param batchSize: The maximum number of requests to launch in parallel. This should not be necessary if the blockshape is small enough that you won't run out of RAM. :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'. :param allowParallelResults: If False, The resultSignal will not be called in parallel. In that case, your handler function has no need for locks. """ self._outputSlot = outputSlot self._bigRoi = roi self._num_threads = max(1, Request.global_thread_pool.num_workers) totalVolume = numpy.prod(numpy.subtract(roi[1], roi[0])) if batchSize is None: batchSize = self._num_threads if blockshape is None: blockshape = self._determine_blockshape(outputSlot) assert blockAlignment in ["relative", "absolute"] if blockAlignment == "relative": # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) block_starts = getIntersectingBlocks(blockshape, offsetRoi) block_starts += roi[0] # Un-offset # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = block_starts.__iter__() while True: try: block_start = next(block_iter) except StopIteration: # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator # https://www.python.org/dev/peps/pep-0479 break else: # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract( self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds( offset_data_shape, blockshape, offset_block_start) # Un-offset block_bounds = ( offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0], ) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_bounds else: # Absolute blocking. # Blocks are simply relative to (0,0,0,...) # But we still clip the requests to the overall roi bounds. block_starts = getIntersectingBlocks(blockshape, roi) def roiGen(): block_iter = block_starts.__iter__() while True: try: block_start = next(block_iter) except StopIteration: # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator # https://www.python.org/dev/peps/pep-0479 break else: block_bounds = getBlockBounds(outputSlot.meta.shape, blockshape, block_start) block_intersecting_portion = getIntersection( block_bounds, roi) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_intersecting_portion self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults) def _determine_blockshape(self, outputSlot): """ Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise. """ input_shape = outputSlot.meta.shape ideal_blockshape = outputSlot.meta.ideal_blockshape ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel max_blockshape = outputSlot.meta.max_blockshape or input_shape num_channels = 1 tagged_shape = outputSlot.meta.getTaggedShape() available_ram = Memory.getAvailableRamComputation() # Generally, we don't want to split requests across channels. if "c" in list(tagged_shape.keys()): num_channels = tagged_shape["c"] channel_index = list(tagged_shape.keys()).index("c") input_shape = input_shape[:channel_index] + input_shape[ channel_index + 1:] max_blockshape = max_blockshape[:channel_index] + max_blockshape[ channel_index + 1:] if ideal_blockshape: # Never enlarge 'ideal' in the channel dimension. num_channels = ideal_blockshape[channel_index] ideal_blockshape = ideal_blockshape[: channel_index] + ideal_blockshape[ channel_index + 1:] del tagged_shape["c"] # Generally, we don't want to join time slices if "t" in tagged_shape.keys(): blockshape_time_steps = 1 time_index = list(tagged_shape.keys()).index("t") input_shape = input_shape[:time_index] + input_shape[time_index + 1:] max_blockshape = max_blockshape[:time_index] + max_blockshape[ time_index + 1:] if ideal_blockshape: # Never enlarge 'ideal' in the time dimension. blockshape_time_steps = ideal_blockshape[time_index] ideal_blockshape = ideal_blockshape[: time_index] + ideal_blockshape[ time_index + 1:] available_ram /= blockshape_time_steps del tagged_shape["t"] if ram_usage_per_requested_pixel is None: # Make a conservative guess: 2*(bytes for dtype) * (num channels) + (fudge factor=4) ram_usage_per_requested_pixel = 2 * outputSlot.meta.dtype( ).nbytes * num_channels + 4 warnings.warn( "Unknown per-pixel RAM requirement. Making a guess.") # Safety factor (fudge factor): Double the estimated RAM usage per pixel safety_factor = 2.0 logger.info( "Estimated RAM usage per pixel is {} * safety factor ({})".format( Memory.format(ram_usage_per_requested_pixel), safety_factor)) ram_usage_per_requested_pixel *= safety_factor if ideal_blockshape is None: blockshape = determineBlockShape( input_shape, (available_ram // (self._num_threads * ram_usage_per_requested_pixel))) blockshape = tuple(numpy.minimum(max_blockshape, blockshape)) warnings.warn("Chose an arbitrary request blockshape") else: logger.info("determining blockshape assuming available_ram is {}" ", split between {} threads".format( Memory.format(available_ram), self._num_threads)) # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel' # Therefore, we do not include the channel dimension in the blockshapes here. # # Also, it rarely makes sense to request more than one time slice, so we omit that, too. (See above.) blockshape = determine_optimal_request_blockshape( max_blockshape, ideal_blockshape, ram_usage_per_requested_pixel, self._num_threads, available_ram) # compute the RAM size of the block before adding back t anc c dimensions fmt = Memory.format(ram_usage_per_requested_pixel * numpy.prod(blockshape)) # If we removed time and channel from consideration, add them back now before returning if "t" in outputSlot.meta.getAxisKeys(): blockshape = blockshape[:time_index] + ( blockshape_time_steps, ) + blockshape[time_index:] if "c" in outputSlot.meta.getAxisKeys(): blockshape = blockshape[:channel_index] + ( num_channels, ) + blockshape[channel_index:] logger.info("Chose blockshape: {}".format(blockshape)) logger.info("Estimated RAM usage per block is {}".format(fmt)) return blockshape @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators.operators import OpArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2 ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress: ") ... sys.stdout.write( "{} ".format( progress ) ) >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] ) Processed 6 result blocks with a total sum of: 68400 """ def __init__(self, outputSlot, roi, minBlockShape, batchSize=None): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param minBlockShape: The minimum amount of data to request in each request. Note: The current implementation breaks the big request into smaller requests of exactly ``minBlockShape`` size. Future implementations could concatenate smaller requests if it appears the system is not being overloaded by the smaller requests. :param batchSize: The maximum number of requests to launch in parallel. """ self._outputSlot = outputSlot self._bigRoi = roi self._minBlockShape = minBlockShape if batchSize is None: batchSize = 2 # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi) self._minBlockStarts += roi[0] # Un-offset totalVolume = numpy.prod(numpy.subtract(roi[1], roi[0])) # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = self._minBlockStarts.__iter__() while True: block_start = block_iter.next() # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds(offset_data_shape, minBlockShape, offset_block_start) # Un-offset block_bounds = (offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0]) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_bounds self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(), totalVolume, batchSize) @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()
class BigRequestStreamer(object): """ Execute a big request by breaking it up into smaller requests. This class encapsulates the logic for dividing big rois into smaller ones to be executed separately. It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object, which is responsible for creating and scheduling the request for each roi. Example: >>> import sys >>> import vigra >>> from lazyflow.graph import Graph >>> from lazyflow.operators import OpBlockedArrayCache >>> # Example data >>> data = numpy.indices( (100,100) ).sum(0) >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') ) >>> op = OpBlockedArrayCache( graph=Graph() ) >>> op.Input.setValue( data ) >>> total_roi = [(25, 65), (45, 95)] >>> # Init with our output slot and roi to request. >>> # batchSize indicates the number of requests to spawn in parallel. >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' ) >>> # Use a callback to handle sub-results one at a time. >>> result_count = [0] >>> result_total_sum = [0] >>> def handle_block_result(roi, result): ... # No need for locking here if allowParallelResults=True. ... result_count[0] += 1 ... result_total_sum[0] += result.sum() >>> streamer.resultSignal.subscribe( handle_block_result ) >>> # Optional: Subscribe to progress updates >>> def handle_progress(progress): ... if progress == 0: ... sys.stdout.write("Progress: ") ... sys.stdout.write( "{} ".format( progress ) ) >>> streamer.progressSignal.subscribe( handle_progress ) >>> # Execute the batch of requests, and block for the result. >>> streamer.execute() Progress: 0 16 33 50 66 83 100 100 >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] ) Processed 6 result blocks with a total sum of: 68400 """ def __init__( self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment="absolute", allowParallelResults=False ): """ Constructor. :param outputSlot: The slot to request data from. :param roi: The roi `(start, stop)` of interest. Will be broken up and requested via smaller requests. :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot. :param batchSize: The maximum number of requests to launch in parallel. This should not be necessary if the blockshape is small enough that you won't run out of RAM. :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'. :param allowParallelResults: If False, The resultSignal will not be called in parallel. In that case, your handler function has no need for locks. """ self._outputSlot = outputSlot self._bigRoi = roi self._num_threads = max(1, Request.global_thread_pool.num_workers) totalVolume = numpy.prod(numpy.subtract(roi[1], roi[0])) if batchSize is None: batchSize = self._num_threads if blockshape is None: blockshape = self._determine_blockshape(outputSlot) assert blockAlignment in ["relative", "absolute"] if blockAlignment == "relative": # Align the blocking with the start of the roi offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0])) block_starts = getIntersectingBlocks(blockshape, offsetRoi) block_starts += roi[0] # Un-offset # For now, simply iterate over the min blocks # TODO: Auto-dialate block sizes based on CPU/RAM usage. def roiGen(): block_iter = block_starts.__iter__() while True: try: block_start = next(block_iter) except StopIteration: # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator # https://www.python.org/dev/peps/pep-0479 break else: # Use offset blocking offset_block_start = block_start - self._bigRoi[0] offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0]) offset_block_bounds = getBlockBounds(offset_data_shape, blockshape, offset_block_start) # Un-offset block_bounds = ( offset_block_bounds[0] + self._bigRoi[0], offset_block_bounds[1] + self._bigRoi[0], ) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_bounds else: # Absolute blocking. # Blocks are simply relative to (0,0,0,...) # But we still clip the requests to the overall roi bounds. block_starts = getIntersectingBlocks(blockshape, roi) def roiGen(): block_iter = block_starts.__iter__() while True: try: block_start = next(block_iter) except StopIteration: # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator # https://www.python.org/dev/peps/pep-0479 break else: block_bounds = getBlockBounds(outputSlot.meta.shape, blockshape, block_start) block_intersecting_portion = getIntersection(block_bounds, roi) logger.debug("Requesting Roi: {}".format(block_bounds)) yield block_intersecting_portion self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults) def _determine_blockshape(self, outputSlot): """ Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise. """ input_shape = outputSlot.meta.shape ideal_blockshape = outputSlot.meta.ideal_blockshape ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel max_blockshape = outputSlot.meta.max_blockshape or input_shape num_channels = 1 tagged_shape = outputSlot.meta.getTaggedShape() available_ram = Memory.getAvailableRamComputation() # Generally, we don't want to split requests across channels. if "c" in list(tagged_shape.keys()): num_channels = tagged_shape["c"] channel_index = list(tagged_shape.keys()).index("c") input_shape = input_shape[:channel_index] + input_shape[channel_index + 1 :] max_blockshape = max_blockshape[:channel_index] + max_blockshape[channel_index + 1 :] if ideal_blockshape: # Never enlarge 'ideal' in the channel dimension. num_channels = ideal_blockshape[channel_index] ideal_blockshape = ideal_blockshape[:channel_index] + ideal_blockshape[channel_index + 1 :] del tagged_shape["c"] # Generally, we don't want to join time slices if "t" in tagged_shape.keys(): blockshape_time_steps = 1 time_index = list(tagged_shape.keys()).index("t") input_shape = input_shape[:time_index] + input_shape[time_index + 1 :] max_blockshape = max_blockshape[:time_index] + max_blockshape[time_index + 1 :] if ideal_blockshape: # Never enlarge 'ideal' in the time dimension. blockshape_time_steps = ideal_blockshape[time_index] ideal_blockshape = ideal_blockshape[:time_index] + ideal_blockshape[time_index + 1 :] available_ram /= blockshape_time_steps del tagged_shape["t"] if ram_usage_per_requested_pixel is None: # Make a conservative guess: 2*(bytes for dtype) * (num channels) + (fudge factor=4) ram_usage_per_requested_pixel = 2 * outputSlot.meta.dtype().nbytes * num_channels + 4 warnings.warn("Unknown per-pixel RAM requirement. Making a guess.") # Safety factor (fudge factor): Double the estimated RAM usage per pixel safety_factor = 2.0 logger.info( "Estimated RAM usage per pixel is {} * safety factor ({})".format( Memory.format(ram_usage_per_requested_pixel), safety_factor ) ) ram_usage_per_requested_pixel *= safety_factor if ideal_blockshape is None: blockshape = determineBlockShape( input_shape, (available_ram // (self._num_threads * ram_usage_per_requested_pixel)) ) blockshape = tuple(numpy.minimum(max_blockshape, blockshape)) warnings.warn("Chose an arbitrary request blockshape") else: logger.info( "determining blockshape assuming available_ram is {}" ", split between {} threads".format(Memory.format(available_ram), self._num_threads) ) # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel' # Therefore, we do not include the channel dimension in the blockshapes here. # # Also, it rarely makes sense to request more than one time slice, so we omit that, too. (See above.) blockshape = determine_optimal_request_blockshape( max_blockshape, ideal_blockshape, ram_usage_per_requested_pixel, self._num_threads, available_ram ) # If we removed time and channel from consideration, add them back now before returning if "t" in outputSlot.meta.getAxisKeys(): blockshape = blockshape[:time_index] + (blockshape_time_steps,) + blockshape[time_index:] if "c" in outputSlot.meta.getAxisKeys(): blockshape = blockshape[:channel_index] + (num_channels,) + blockshape[channel_index:] logger.info("Chose blockshape: {}".format(blockshape)) fmt = Memory.format(ram_usage_per_requested_pixel * numpy.prod(blockshape[:-1])) logger.info("Estimated RAM usage per block is {}".format(fmt)) return blockshape @property def resultSignal(self): """ Results signal. Signature: ``f(roi, result)``. Guaranteed not to be called from multiple threads in parallel. """ return self._requestBatch.resultSignal @property def progressSignal(self): """ Progress Signal Signature: ``f(progress_percent)`` """ return self._requestBatch.progressSignal def execute(self): """ Request the data for the entire roi by breaking it up into many smaller requests, and wait for all of them to complete. A batch of N requests is launched, and subsequent requests are launched one-by-one as the earlier requests complete. Thus, there will be N requests executing in parallel at all times. This method returns ``None``. All results must be handled via the :py:obj:`resultSignal`. """ self._requestBatch.execute()