Example #1
0
 def testFailedProcessing(self):
     op = OpArrayPiper( graph=Graph() )
     inputData = numpy.indices( (100,100) ).sum(0)
     op.Input.setValue( inputData )
     roiList = []
     block_starts = getIntersectingBlocks( [10,10], ([0,0], [100, 100]) )
     for block_start in block_starts:
         roiList.append( getBlockBounds( [100,100], [10,10], block_start ) )    
 
     class SpecialException(Exception): pass
     def handleResult(roi, result):
         raise SpecialException("Intentional Exception: raised while handling the result")
     
     totalVolume = numpy.prod( inputData.shape )
     batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10, allowParallelResults=False)
     batch.resultSignal.subscribe( handleResult )
     
     # FIXME: There are multiple places where the RoiRequestBatch tool should be prepared to handle exceptions.
     #        This only tests one of them (in the notify_finished() handler)
     try:
         batch.execute()
     except SpecialException:
         pass
     else:
         assert False, "Expected exception to be propagated out of the RoiRequestBatch."
Example #2
0
class BigRequestStreamer(object):
    """
    Execute a big request by breaking it up into smaller requests.
    
    This class encapsulates the logic for dividing big rois into smaller ones to be executed separately.
    It relies on a RoiRequestBatch object, which is responsible for creating and scheduling the request for each roi.
    """
    def __init__(self, outputSlot, roi, minBlockShape, batchSize=None):
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._minBlockShape = minBlockShape
        
        if batchSize is None:
            batchSize=2

        # Align the blocking with the start of the roi
        offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
        self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi)
        self._minBlockStarts += roi[0] # Un-offset

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        # For now, simply iterate over the min blocks
        # TODO: Auto-dialate block sizes based on CPU/RAM usage.
        def roiGen():
            block_iter = self._minBlockStarts.__iter__()
            while True:
                block_start = block_iter.next()

                # Use offset blocking
                offset_block_start = block_start - self._bigRoi[0]
                offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start )
                
                # Un-offset
                block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                 offset_block_bounds[1] + self._bigRoi[0] )
                logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                yield block_bounds
        
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize )

    @property
    def progressSignal(self):
        return self._requestBatch.progressSignal

    @property
    def resultSignal(self):
        return self._requestBatch.resultSignal

    def execute(self):
        self._requestBatch.execute()
Example #3
0
    def __init__(self, outputSlot, roi, minBlockShape, batchSize=None):
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._minBlockShape = minBlockShape
        
        if batchSize is None:
            batchSize=2

        # Align the blocking with the start of the roi
        offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
        self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi)
        self._minBlockStarts += roi[0] # Un-offset

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        # For now, simply iterate over the min blocks
        # TODO: Auto-dialate block sizes based on CPU/RAM usage.
        def roiGen():
            block_iter = self._minBlockStarts.__iter__()
            while True:
                block_start = block_iter.next()

                # Use offset blocking
                offset_block_start = block_start - self._bigRoi[0]
                offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start )
                
                # Un-offset
                block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                 offset_block_bounds[1] + self._bigRoi[0] )
                logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                yield block_bounds
        
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize )
    def run_export(self) -> None:
        """Export an image from Input to Filepath."""
        path = pathlib.Path(self.Filepath.value)
        if path.exists():
            path.unlink()

        self._page_buf = _NdBuf(self._opReorderAxes.Output.meta.shape[:-2])

        batch = RoiRequestBatch(
            outputSlot=self._opReorderAxes.Output,
            roiIterator=_page_rois(*self._opReorderAxes.Output.meta.shape),
            totalVolume=np.prod(self._opReorderAxes.Output.meta.shape),
            batchSize=self._batch_size,
        )
        batch.progressSignal.subscribe(self.progressSignal)
        batch.resultSignal.subscribe(self._write_buffered_pages)
        batch.execute()
    def testBasic(self):
        op = OpArrayPiper( graph=Graph() )
        inputData = numpy.indices( (100,100) ).sum(0)
        op.Input.setValue( inputData )
        roiList = []
        block_starts = getIntersectingBlocks( [10,10], ([0,0], [100, 100]) )
        for block_start in block_starts:
            roiList.append( getBlockBounds( [100,100], [10,10], block_start ) )    
    
        results = numpy.zeros( (100,100), dtype=numpy.int32 )
        resultslock = threading.Lock()

        resultsCount = [0]
        
        def handleResult(roi, result):
            acquired = resultslock.acquire(False)
            assert acquired, "resultslock is contested! Access to callback is supposed to be automatically serialized."
            results[ roiToSlice( *roi ) ] = result
            logger.debug( "Got result for {}".format(roi) )
            resultslock.release()
            resultsCount[0] += 1

        progressList = []
        def handleProgress( progress ):
            progressList.append( progress )
            logger.debug( "Progress update: {}".format(progress) )
        
        totalVolume = numpy.prod( inputData.shape )
        batch = RoiRequestBatch(op.Output, roiList.__iter__(), totalVolume, batchSize=10)
        batch.resultSignal.subscribe( handleResult )
        batch.progressSignal.subscribe( handleProgress )
        
        batch.execute()
        logger.debug( "Got {} results".format( resultsCount[0] ) )
        assert (results == inputData).all()

        # Progress reporting MUST start with 0 and end with 100        
        assert progressList[0] == 0, "Invalid progress reporting."
        assert progressList[-1] == 100, "Invalid progress reporting."
        
        # There should be some intermediate progress reporting, but exactly how much is unspecified.
        assert len(progressList) >= 10
        
        logger.debug( "FINISHED" )
    def __init__(self, outputSlot, roi, minBlockShape, batchSize=None):
        """
        Constructor.
        
        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param minBlockShape: The minimum amount of data to request in each request.
                              Note: The current implementation breaks the big request into smaller 
                              requests of exactly ``minBlockShape`` size. Future implementations could 
                              concatenate smaller requests if it appears the system is not being overloaded by the smaller requests.
        :param batchSize: The maximum number of requests to launch in parallel.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._minBlockShape = minBlockShape
        
        if batchSize is None:
            batchSize=2

        # Align the blocking with the start of the roi
        offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
        self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi)
        self._minBlockStarts += roi[0] # Un-offset

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        # For now, simply iterate over the min blocks
        # TODO: Auto-dialate block sizes based on CPU/RAM usage.
        def roiGen():
            block_iter = self._minBlockStarts.__iter__()
            while True:
                block_start = block_iter.next()

                # Use offset blocking
                offset_block_start = block_start - self._bigRoi[0]
                offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start )
                
                # Un-offset
                block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                 offset_block_bounds[1] + self._bigRoi[0] )
                logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                yield block_bounds
        
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize )
Example #7
0
    def __init__(self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment='absolute', allowParallelResults=False):
        """
        Constructor.
        
        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot.
        :param batchSize: The maximum number of requests to launch in parallel.  This should not be necessary if the blockshape is small enough that you won't run out of RAM.
        :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'.
        :param allowParallelResults: If True, The resultSignal will not be called in parallel.
                                     In that case, your handler function has no need for locks.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        
        if batchSize is None:
            batchSize=1000
        
        if blockshape is None:
            blockshape = self._determine_blockshape(outputSlot)

        assert blockAlignment in ['relative', 'absolute']
        if blockAlignment == 'relative':
            # Align the blocking with the start of the roi
            offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
            block_starts = getIntersectingBlocks(blockshape, offsetRoi)
            block_starts += roi[0] # Un-offset

            # For now, simply iterate over the min blocks
            # TODO: Auto-dialate block sizes based on CPU/RAM usage.
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    block_start = block_iter.next()
    
                    # Use offset blocking
                    offset_block_start = block_start - self._bigRoi[0]
                    offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                    offset_block_bounds = getBlockBounds( offset_data_shape, blockshape, offset_block_start )
                    
                    # Un-offset
                    block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                     offset_block_bounds[1] + self._bigRoi[0] )
                    logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                    yield block_bounds
            
        else:
            # Absolute blocking.
            # Blocks are simply relative to (0,0,0,...)
            # But we still clip the requests to the overall roi bounds.
            block_starts = getIntersectingBlocks(blockshape, roi)
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    block_start = block_iter.next()
                    block_bounds = getBlockBounds( outputSlot.meta.shape, blockshape, block_start )
                    block_intersecting_portion = getIntersection( block_bounds, roi )
    
                    logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                    yield block_intersecting_portion
                
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults )
Example #8
0
class BigRequestStreamer(object):
    """
    Execute a big request by breaking it up into smaller requests.
    
    This class encapsulates the logic for dividing big rois into smaller ones to be executed separately.
    It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object,
    which is responsible for creating and scheduling the request for each roi.
    
    Example:
    
    >>> import sys
    >>> import vigra
    >>> from lazyflow.graph import Graph
    >>> from lazyflow.operators.operators import OpArrayCache

    >>> # Example data
    >>> data = numpy.indices( (100,100) ).sum(0)
    >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') )

    >>> op = OpArrayCache( graph=Graph() )
    >>> op.Input.setValue( data )

    >>> total_roi = [(25, 65), (45, 95)]

    >>> # Init with our output slot and roi to request.
    >>> # batchSize indicates the number of requests to spawn in parallel.
    >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' )

    >>> # Use a callback to handle sub-results one at a time.
    >>> result_count = [0]
    >>> result_total_sum = [0]
    >>> def handle_block_result(roi, result):
    ...     # No need for locking here if allowParallelResults=True.
    ...     result_count[0] += 1
    ...     result_total_sum[0] += result.sum()
    >>> streamer.resultSignal.subscribe( handle_block_result )

    >>> # Optional: Subscribe to progress updates
    >>> def handle_progress(progress):
    ...     if progress == 0:
    ...         sys.stdout.write("Progress: ")
    ...     sys.stdout.write( "{} ".format( progress ) )
    >>> streamer.progressSignal.subscribe( handle_progress )

    >>> # Execute the batch of requests, and block for the result.
    >>> streamer.execute()
    Progress: 0 16 33 50 66 83 100 100 
    >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] )
    Processed 6 result blocks with a total sum of: 68400
    """
    def __init__(self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment='absolute', allowParallelResults=False):
        """
        Constructor.
        
        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot.
        :param batchSize: The maximum number of requests to launch in parallel.  This should not be necessary if the blockshape is small enough that you won't run out of RAM.
        :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'.
        :param allowParallelResults: If True, The resultSignal will not be called in parallel.
                                     In that case, your handler function has no need for locks.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        
        if batchSize is None:
            batchSize=1000
        
        if blockshape is None:
            blockshape = self._determine_blockshape(outputSlot)

        assert blockAlignment in ['relative', 'absolute']
        if blockAlignment == 'relative':
            # Align the blocking with the start of the roi
            offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
            block_starts = getIntersectingBlocks(blockshape, offsetRoi)
            block_starts += roi[0] # Un-offset

            # For now, simply iterate over the min blocks
            # TODO: Auto-dialate block sizes based on CPU/RAM usage.
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    block_start = block_iter.next()
    
                    # Use offset blocking
                    offset_block_start = block_start - self._bigRoi[0]
                    offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                    offset_block_bounds = getBlockBounds( offset_data_shape, blockshape, offset_block_start )
                    
                    # Un-offset
                    block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                     offset_block_bounds[1] + self._bigRoi[0] )
                    logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                    yield block_bounds
            
        else:
            # Absolute blocking.
            # Blocks are simply relative to (0,0,0,...)
            # But we still clip the requests to the overall roi bounds.
            block_starts = getIntersectingBlocks(blockshape, roi)
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    block_start = block_iter.next()
                    block_bounds = getBlockBounds( outputSlot.meta.shape, blockshape, block_start )
                    block_intersecting_portion = getIntersection( block_bounds, roi )
    
                    logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                    yield block_intersecting_portion
                
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults )

    def _determine_blockshape(self, outputSlot):
        """
        Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise.
        """
        input_shape = outputSlot.meta.shape
        max_blockshape = input_shape
        ideal_blockshape = outputSlot.meta.ideal_blockshape
        ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel
        
        num_threads = Request.global_thread_pool.num_workers
        available_ram = psutil.virtual_memory().available
        
        # Fudge factor: Reduce RAM usage by a bit
        available_ram *= 0.5

        if ram_usage_per_requested_pixel is None:
            # Make a conservative guess: (bytes for dtype) * (num channels) + (fudge factor=4)
            ram_usage_per_requested_pixel = 4 + 2*outputSlot.meta.dtype().nbytes*outputSlot.meta.shape[-1]
            logger.warn( "Unknown RAM usage.  Making a guess." )
        else:
            logger.info( "Estimated RAM usage per pixel is {} bytes"
                               .format( ram_usage_per_requested_pixel ) )
        
        if ideal_blockshape is None:
            blockshape = determineBlockShape( input_shape, available_ram/(num_threads*ram_usage_per_requested_pixel) )
            logger.warn( "Chose an arbitrary request blockshape {}".format( blockshape ) )
        else:
            logger.info( "determining blockshape assuming available_ram is {} GB, split between {} threads"
                               .format( available_ram/1e9, num_threads ) )
            
            # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel'
            # Therefore, we do not include the channel dimension in the blockshapes here.
            blockshape = determine_optimal_request_blockshape( max_blockshape[:-1], 
                                                               ideal_blockshape[:-1], 
                                                               ram_usage_per_requested_pixel, 
                                                               num_threads, 
                                                               available_ram )
            blockshape += (outputSlot.meta.shape[-1],)
            logger.info( "Chose blockshape: {}".format( blockshape ) )
            logger.info( "Estimated RAM usage per block is {} GB"
                         .format( ram_usage_per_requested_pixel * numpy.prod( blockshape[:-1] ) / 1e9 ) )

        return blockshape
        
    @property
    def resultSignal(self):
        """
        Results signal. Signature: ``f(roi, result)``.
        Guaranteed not to be called from multiple threads in parallel.
        """
        return self._requestBatch.resultSignal

    @property
    def progressSignal(self):
        """
        Progress Signal Signature: ``f(progress_percent)``
        """
        return self._requestBatch.progressSignal

    def execute(self):
        """
        Request the data for the entire roi by breaking it up into many smaller requests,
        and wait for all of them to complete.
        A batch of N requests is launched, and subsequent requests are 
        launched one-by-one as the earlier requests complete.  Thus, there 
        will be N requests executing in parallel at all times.
        
        This method returns ``None``.  All results must be handled via the 
        :py:obj:`resultSignal`.
        """
        self._requestBatch.execute()
class BigRequestStreamer(object):
    """
    Execute a big request by breaking it up into smaller requests.
    
    This class encapsulates the logic for dividing big rois into smaller ones to be executed separately.
    It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object,
    which is responsible for creating and scheduling the request for each roi.
    
    Example:
    
    >>> import sys
    >>> import vigra
    >>> from lazyflow.graph import Graph
    >>> from lazyflow.operators.operators import OpArrayCache

    >>> # Example data
    >>> data = numpy.indices( (100,100) ).sum(0)
    >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') )

    >>> op = OpArrayCache( graph=Graph() )
    >>> op.Input.setValue( data )

    >>> total_roi = [(25, 65), (45, 95)]

    >>> # Init with our output slot and roi to request.
    >>> # batchSize indicates the number of requests to spawn in parallel.
    >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2 )

    >>> # Use a callback to handle sub-results one at a time.
    >>> result_count = [0]
    >>> result_total_sum = [0]
    >>> def handle_block_result(roi, result):
    ...     # No need for locking here.
    ...     result_count[0] += 1
    ...     result_total_sum[0] += result.sum()
    >>> streamer.resultSignal.subscribe( handle_block_result )

    >>> # Optional: Subscribe to progress updates
    >>> def handle_progress(progress):
    ...     if progress == 0:
    ...         sys.stdout.write("Progress: ")
    ...     sys.stdout.write( "{} ".format( progress ) )
    >>> streamer.progressSignal.subscribe( handle_progress )

    >>> # Execute the batch of requests, and block for the result.
    >>> streamer.execute()
    Progress: 0 16 33 50 66 83 100 100 
    >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] )
    Processed 6 result blocks with a total sum of: 68400
    """
    def __init__(self, outputSlot, roi, minBlockShape, batchSize=None):
        """
        Constructor.
        
        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param minBlockShape: The minimum amount of data to request in each request.
                              Note: The current implementation breaks the big request into smaller 
                              requests of exactly ``minBlockShape`` size. Future implementations could 
                              concatenate smaller requests if it appears the system is not being overloaded by the smaller requests.
        :param batchSize: The maximum number of requests to launch in parallel.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._minBlockShape = minBlockShape
        
        if batchSize is None:
            batchSize=2

        # Align the blocking with the start of the roi
        offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
        self._minBlockStarts = getIntersectingBlocks(minBlockShape, offsetRoi)
        self._minBlockStarts += roi[0] # Un-offset

        totalVolume = numpy.prod( numpy.subtract(roi[1], roi[0]) )
        # For now, simply iterate over the min blocks
        # TODO: Auto-dialate block sizes based on CPU/RAM usage.
        def roiGen():
            block_iter = self._minBlockStarts.__iter__()
            while True:
                block_start = block_iter.next()

                # Use offset blocking
                offset_block_start = block_start - self._bigRoi[0]
                offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                offset_block_bounds = getBlockBounds( offset_data_shape, minBlockShape, offset_block_start )
                
                # Un-offset
                block_bounds = ( offset_block_bounds[0] + self._bigRoi[0],
                                 offset_block_bounds[1] + self._bigRoi[0] )
                logger.debug( "Requesting Roi: {}".format( block_bounds ) )
                yield block_bounds
        
        self._requestBatch = RoiRequestBatch( self._outputSlot, roiGen(), totalVolume, batchSize )

    @property
    def resultSignal(self):
        """
        Results signal. Signature: ``f(roi, result)``.
        Guaranteed not to be called from multiple threads in parallel.
        """
        return self._requestBatch.resultSignal

    @property
    def progressSignal(self):
        """
        Progress Signal Signature: ``f(progress_percent)``
        """
        return self._requestBatch.progressSignal

    def execute(self):
        """
        Request the data for the entire roi by breaking it up into many smaller requests,
        and wait for all of them to complete.
        A batch of N requests is launched, and subsequent requests are 
        launched one-by-one as the earlier requests complete.  Thus, there 
        will be N requests executing in parallel at all times.
        
        This method returns ``None``.  All results must be handled via the 
        :py:obj:`resultSignal`.
        """
        self._requestBatch.execute()
Example #10
0
class BigRequestStreamer(object):
    """
    Execute a big request by breaking it up into smaller requests.

    This class encapsulates the logic for dividing big rois into smaller ones to be executed separately.
    It relies on a :py:class:`RoiRequestBatch<lazyflow.utility.roiRequestBatch.RoiRequestBatch>` object,
    which is responsible for creating and scheduling the request for each roi.

    Example:

    >>> import sys
    >>> import vigra
    >>> from lazyflow.graph import Graph
    >>> from lazyflow.operators import OpBlockedArrayCache

    >>> # Example data
    >>> data = numpy.indices( (100,100) ).sum(0)
    >>> data = vigra.taggedView( data, vigra.defaultAxistags('xy') )

    >>> op = OpBlockedArrayCache( graph=Graph() )
    >>> op.Input.setValue( data )

    >>> total_roi = [(25, 65), (45, 95)]

    >>> # Init with our output slot and roi to request.
    >>> # batchSize indicates the number of requests to spawn in parallel.
    >>> streamer = BigRequestStreamer( op.Output, total_roi, (10,10), batchSize=2, blockAlignment='relative' )

    >>> # Use a callback to handle sub-results one at a time.
    >>> result_count = [0]
    >>> result_total_sum = [0]
    >>> def handle_block_result(roi, result):
    ...     # No need for locking here if allowParallelResults=True.
    ...     result_count[0] += 1
    ...     result_total_sum[0] += result.sum()
    >>> streamer.resultSignal.subscribe( handle_block_result )

    >>> # Optional: Subscribe to progress updates
    >>> def handle_progress(progress):
    ...     if progress == 0:
    ...         sys.stdout.write("Progress: ")
    ...     sys.stdout.write( "{} ".format( progress ) )
    >>> streamer.progressSignal.subscribe( handle_progress )

    >>> # Execute the batch of requests, and block for the result.
    >>> streamer.execute()
    Progress: 0 16 33 50 66 83 100 100
    >>> print "Processed {} result blocks with a total sum of: {}".format( result_count[0], result_total_sum[0] )
    Processed 6 result blocks with a total sum of: 68400
    """

    def __init__(
        self, outputSlot, roi, blockshape=None, batchSize=None, blockAlignment="absolute", allowParallelResults=False
    ):
        """
        Constructor.

        :param outputSlot: The slot to request data from.
        :param roi: The roi `(start, stop)` of interest.  Will be broken up and requested via smaller requests.
        :param blockshape: The amount of data to request in each request. If omitted, a default blockshape is chosen by inspecting the metadata of the given slot.
        :param batchSize: The maximum number of requests to launch in parallel.  This should not be necessary if the blockshape is small enough that you won't run out of RAM.
        :param blockAlignment: Determines how block the requests. Choices are 'absolute' or 'relative'.
        :param allowParallelResults: If False, The resultSignal will not be called in parallel.
                                     In that case, your handler function has no need for locks.
        """
        self._outputSlot = outputSlot
        self._bigRoi = roi
        self._num_threads = max(1, Request.global_thread_pool.num_workers)

        totalVolume = numpy.prod(numpy.subtract(roi[1], roi[0]))

        if batchSize is None:
            batchSize = self._num_threads

        if blockshape is None:
            blockshape = self._determine_blockshape(outputSlot)

        assert blockAlignment in ["relative", "absolute"]
        if blockAlignment == "relative":
            # Align the blocking with the start of the roi
            offsetRoi = ([0] * len(roi[0]), numpy.subtract(roi[1], roi[0]))
            block_starts = getIntersectingBlocks(blockshape, offsetRoi)
            block_starts += roi[0]  # Un-offset

            # For now, simply iterate over the min blocks
            # TODO: Auto-dialate block sizes based on CPU/RAM usage.
            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    try:
                        block_start = next(block_iter)
                    except StopIteration:
                        # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator
                        # https://www.python.org/dev/peps/pep-0479
                        break
                    else:
                        # Use offset blocking
                        offset_block_start = block_start - self._bigRoi[0]
                        offset_data_shape = numpy.subtract(self._bigRoi[1], self._bigRoi[0])
                        offset_block_bounds = getBlockBounds(offset_data_shape, blockshape, offset_block_start)

                        # Un-offset
                        block_bounds = (
                            offset_block_bounds[0] + self._bigRoi[0],
                            offset_block_bounds[1] + self._bigRoi[0],
                        )
                        logger.debug("Requesting Roi: {}".format(block_bounds))
                        yield block_bounds

        else:
            # Absolute blocking.
            # Blocks are simply relative to (0,0,0,...)
            # But we still clip the requests to the overall roi bounds.
            block_starts = getIntersectingBlocks(blockshape, roi)

            def roiGen():
                block_iter = block_starts.__iter__()
                while True:
                    try:
                        block_start = next(block_iter)
                    except StopIteration:
                        # As of Python 3.7, not allowed to let StopIteration exceptions escape a generator
                        # https://www.python.org/dev/peps/pep-0479
                        break
                    else:
                        block_bounds = getBlockBounds(outputSlot.meta.shape, blockshape, block_start)
                        block_intersecting_portion = getIntersection(block_bounds, roi)

                        logger.debug("Requesting Roi: {}".format(block_bounds))
                        yield block_intersecting_portion

        self._requestBatch = RoiRequestBatch(self._outputSlot, roiGen(), totalVolume, batchSize, allowParallelResults)

    def _determine_blockshape(self, outputSlot):
        """
        Choose a blockshape using the slot metadata (if available) or an arbitrary guess otherwise.
        """
        input_shape = outputSlot.meta.shape
        ideal_blockshape = outputSlot.meta.ideal_blockshape
        ram_usage_per_requested_pixel = outputSlot.meta.ram_usage_per_requested_pixel
        max_blockshape = outputSlot.meta.max_blockshape or input_shape

        num_channels = 1
        tagged_shape = outputSlot.meta.getTaggedShape()

        available_ram = Memory.getAvailableRamComputation()

        # Generally, we don't want to split requests across channels.
        if "c" in list(tagged_shape.keys()):
            num_channels = tagged_shape["c"]
            channel_index = list(tagged_shape.keys()).index("c")
            input_shape = input_shape[:channel_index] + input_shape[channel_index + 1 :]
            max_blockshape = max_blockshape[:channel_index] + max_blockshape[channel_index + 1 :]
            if ideal_blockshape:
                # Never enlarge 'ideal' in the channel dimension.
                num_channels = ideal_blockshape[channel_index]
                ideal_blockshape = ideal_blockshape[:channel_index] + ideal_blockshape[channel_index + 1 :]
            del tagged_shape["c"]

        # Generally, we don't want to join time slices
        if "t" in tagged_shape.keys():
            blockshape_time_steps = 1
            time_index = list(tagged_shape.keys()).index("t")
            input_shape = input_shape[:time_index] + input_shape[time_index + 1 :]
            max_blockshape = max_blockshape[:time_index] + max_blockshape[time_index + 1 :]
            if ideal_blockshape:
                # Never enlarge 'ideal' in the time dimension.
                blockshape_time_steps = ideal_blockshape[time_index]
                ideal_blockshape = ideal_blockshape[:time_index] + ideal_blockshape[time_index + 1 :]
                available_ram /= blockshape_time_steps
            del tagged_shape["t"]

        if ram_usage_per_requested_pixel is None:
            # Make a conservative guess: 2*(bytes for dtype) * (num channels) + (fudge factor=4)
            ram_usage_per_requested_pixel = 2 * outputSlot.meta.dtype().nbytes * num_channels + 4
            warnings.warn("Unknown per-pixel RAM requirement.  Making a guess.")

        # Safety factor (fudge factor): Double the estimated RAM usage per pixel
        safety_factor = 2.0
        logger.info(
            "Estimated RAM usage per pixel is {} * safety factor ({})".format(
                Memory.format(ram_usage_per_requested_pixel), safety_factor
            )
        )
        ram_usage_per_requested_pixel *= safety_factor

        if ideal_blockshape is None:
            blockshape = determineBlockShape(
                input_shape, (available_ram // (self._num_threads * ram_usage_per_requested_pixel))
            )
            blockshape = tuple(numpy.minimum(max_blockshape, blockshape))
            warnings.warn("Chose an arbitrary request blockshape")
        else:
            logger.info(
                "determining blockshape assuming available_ram is {}"
                ", split between {} threads".format(Memory.format(available_ram), self._num_threads)
            )

            # By convention, ram_usage_per_requested_pixel refers to the ram used when requesting ALL channels of a 'pixel'
            # Therefore, we do not include the channel dimension in the blockshapes here.
            #
            # Also, it rarely makes sense to request more than one time slice, so we omit that, too. (See above.)
            blockshape = determine_optimal_request_blockshape(
                max_blockshape, ideal_blockshape, ram_usage_per_requested_pixel, self._num_threads, available_ram
            )

        # If we removed time and channel from consideration, add them back now before returning
        if "t" in outputSlot.meta.getAxisKeys():
            blockshape = blockshape[:time_index] + (blockshape_time_steps,) + blockshape[time_index:]

        if "c" in outputSlot.meta.getAxisKeys():
            blockshape = blockshape[:channel_index] + (num_channels,) + blockshape[channel_index:]

        logger.info("Chose blockshape: {}".format(blockshape))
        fmt = Memory.format(ram_usage_per_requested_pixel * numpy.prod(blockshape[:-1]))
        logger.info("Estimated RAM usage per block is {}".format(fmt))

        return blockshape

    @property
    def resultSignal(self):
        """
        Results signal. Signature: ``f(roi, result)``.
        Guaranteed not to be called from multiple threads in parallel.
        """
        return self._requestBatch.resultSignal

    @property
    def progressSignal(self):
        """
        Progress Signal Signature: ``f(progress_percent)``
        """
        return self._requestBatch.progressSignal

    def execute(self):
        """
        Request the data for the entire roi by breaking it up into many smaller requests,
        and wait for all of them to complete.
        A batch of N requests is launched, and subsequent requests are
        launched one-by-one as the earlier requests complete.  Thus, there
        will be N requests executing in parallel at all times.

        This method returns ``None``.  All results must be handled via the
        :py:obj:`resultSignal`.
        """
        self._requestBatch.execute()