예제 #1
0
    def write(self):
        """
        Requests the entire input and saves it to the file.
        This function executes synchronously.
        """
        # TODO: Use a lazyflow.utility.BigRequestStreamer to split up 
        #       this giant request into a series of streamed subrequests.
        
        logger.warn("The current implementation of NPY-format data export computes the entire dataset at once, which requires lots of RAM.")
        path = self.Filepath.value

        self.progressSignal(0)

        final_data = numpy.zeros( self.Input.meta.shape, self.Input.meta.dtype )

        def handle_block_result(roi, data):
            slicing = roiToSlice(*roi)
            final_data[slicing] = data
        requester = BigRequestStreamer( self.Input, roiFromShape( self.Input.meta.shape ) )
        requester.resultSignal.subscribe( handle_block_result )
        requester.progressSignal.subscribe( self.progressSignal )
        requester.execute()

        numpy.save(path, final_data)
        self.progressSignal(100)
예제 #2
0
    def write(self):
        """
        Requests the entire input and saves it to the file.
        This function executes synchronously.
        """
        # TODO: Use a lazyflow.utility.BigRequestStreamer to split up
        #       this giant request into a series of streamed subrequests.

        logger.warning(
            "The current implementation of NPY-format data export computes the entire dataset at once, which requires lots of RAM."
        )
        path = self.Filepath.value

        self.progressSignal(0)

        final_data = numpy.zeros(self.Input.meta.shape, self.Input.meta.dtype)

        def handle_block_result(roi, data):
            slicing = roiToSlice(*roi)
            final_data[slicing] = data

        requester = BigRequestStreamer(self.Input,
                                       roiFromShape(self.Input.meta.shape))
        requester.resultSignal.subscribe(handle_block_result)
        requester.progressSignal.subscribe(self.progressSignal)
        requester.execute()

        numpy.save(path, final_data)
        self.progressSignal(100)
예제 #3
0
    def run_export(self):
        self.progressSignal(0)

        url = self.NodeDataUrl.value
        url_path = url.split('://')[1]
        hostname, api, node, uuid, dataname = url_path.split('/')
        assert api == 'api'
        assert node == 'node'
        
        axiskeys = self.Input.meta.getAxisKeys()
        shape = self.Input.meta.shape
        
        if self._transpose_axes:
            axiskeys = reversed(axiskeys)
            shape = tuple(reversed(shape))
        
        axiskeys = "".join( axiskeys )

        if self.OffsetCoord.ready():
            offset_start = self.OffsetCoord.value
        else:
            offset_start = (0,) * len( self.Input.meta.shape )

        self.progressSignal(5)
        
        # Get the dataset details
        try:
            metadata = VoxelsAccessor.get_metadata(hostname, uuid, dataname)
        except VoxelsAccessor.BadRequestError as ex:
            # Dataset doesn't exist yet.  Let's create it.
            metadata = VoxelsMetadata.create_default_metadata( shape, 
                                                               self.Input.meta.dtype, 
                                                               axiskeys, 
                                                               0.0, 
                                                               "" )
            VoxelsAccessor.create_new(hostname, uuid, dataname, metadata)

        # Since this class is generally used to push large blocks of data,
        #  we'll be nice and set throttle=True
        client = VoxelsAccessor( hostname, uuid, dataname, throttle=True )
        
        def handle_block_result(roi, data):
            # Send it to dvid
            roi = numpy.asarray(roi)
            roi += offset_start
            start, stop = roi
            if self._transpose_axes:
                data = data.transpose()
                start = tuple(reversed(start))
                stop = tuple(reversed(stop))
                client.post_ndarray( start, stop, data )
        requester = BigRequestStreamer( self.Input, roiFromShape( self.Input.meta.shape ) )
        requester.resultSignal.subscribe( handle_block_result )
        requester.progressSignal.subscribe( self.progressSignal )
        requester.execute()
        
        self.progressSignal(100)
    
        
예제 #4
0
    def run_export(self):
        self.progressSignal(0)

        url = self.NodeDataUrl.value
        url_path = url.split('://')[1]
        hostname, api, node, uuid, dataname = url_path.split('/')
        assert api == 'api'
        assert node == 'node'

        axiskeys = self.Input.meta.getAxisKeys()
        shape = self.Input.meta.shape

        if self._transpose_axes:
            axiskeys = reversed(axiskeys)
            shape = tuple(reversed(shape))

        axiskeys = "".join(axiskeys)

        if self.OffsetCoord.ready():
            offset_start = self.OffsetCoord.value
        else:
            offset_start = (0, ) * len(self.Input.meta.shape)

        self.progressSignal(5)

        # Get the dataset details
        try:
            metadata = VoxelsAccessor.get_metadata(hostname, uuid, dataname)
        except DVIDException as ex:
            if ex.status != 404:
                raise
            # Dataset doesn't exist yet.  Let's create it.
            metadata = VoxelsMetadata.create_default_metadata(
                shape, self.Input.meta.dtype, axiskeys, 0.0, "")
            VoxelsAccessor.create_new(hostname, uuid, dataname, metadata)

        # Since this class is generally used to push large blocks of data,
        #  we'll be nice and set throttle=True
        client = VoxelsAccessor(hostname, uuid, dataname, throttle=True)

        def handle_block_result(roi, data):
            # Send it to dvid
            roi = numpy.asarray(roi)
            roi += offset_start
            start, stop = roi
            if self._transpose_axes:
                data = data.transpose()
                start = tuple(reversed(start))
                stop = tuple(reversed(stop))
                client.post_ndarray(start, stop, data)

        requester = BigRequestStreamer(self.Input,
                                       roiFromShape(self.Input.meta.shape))
        requester.resultSignal.subscribe(handle_block_result)
        requester.progressSignal.subscribe(self.progressSignal)
        requester.execute()

        self.progressSignal(100)
예제 #5
0
    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile(configFilePath)

        blockwiseFileset = self._primaryBlockwiseFileset

        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )

        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len(roi.start) != len(self.Input.meta.shape):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format(
                roiString)

        logger.info("Executing for roi: {}".format(roi))

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (
            blockwiseFileset.getEntireBlockRoi(roi.start)[1] == roi.stop
        ).all(
        ), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format(
            roi.start, roi.stop)
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map(
            lambda tag: config.task_subrequest_shape[tag.key],
            self.Input.meta.axistags)
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop),
                                          subrequest_shape,
                                          config.task_parallel_subrequests)
            streamer.progressSignal.subscribe(self.progressSignal)
            streamer.resultSignal.subscribe(self._handlePrimaryResultBlock)
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus(roi.start,
                                            BlockwiseFileset.BLOCK_AVAILABLE)

        logger.info("Finished task in {} seconds".format(
            computeTimer.seconds()))
        result[0] = True
        return result
예제 #6
0
    def testForMemoryLeaks(self):
        """
        If the BigRequestStreamer doesn't clean requests as they complete, they'll take up too much memory.
        
        Edit: This test attempts to find memory issues indirectly, via psutil.virtual_memory().
              That doesn't really work very well.  The new test below, test_results_discarded() is a better check.
        """

        gc.collect()

        vmem = psutil.virtual_memory()
        start_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        logger.debug("Starting test with memory usage at: {} MB".format(
            start_mem_usage_mb))

        op = OpNonsense(graph=Graph())

        def handleResult(roi, result):
            pass

        def handleProgress(progress):
            #gc.collect()
            logger.debug("Progress update: {}".format(progress))
            #vmem = psutil.virtual_memory()
            #finished_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
            #difference_mb = finished_mem_usage_mb - start_mem_usage_mb
            #logger.debug( "Progress update: {} with memory usage at: {} MB ({} MB increase)".format( progress, finished_mem_usage_mb, difference_mb ) )

        batch = BigRequestStreamer(op.Output, [(0, 0, 0), (100, 1000, 1000)],
                                   (100, 100, 100))
        batch.resultSignal.subscribe(handleResult)
        batch.progressSignal.subscribe(handleProgress)
        batch.execute()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug(
            "Finished execution with memory usage at: {} MB ({} MB increase)".
            format(finished_mem_usage_mb, difference_mb))

        # Collect
        gc.collect()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug(
            "Finished test with memory usage at: {} MB ({} MB increase)".
            format(finished_mem_usage_mb, difference_mb))
        assert difference_mb < 200, "BigRequestStreamer seems to have memory leaks.  After executing, RAM usage increased by {}".format(
            difference_mb)
예제 #7
0
    def run_export_to_array(self):
        # Allocate result
        final_result = numpy.ndarray( dtype=self.Input.meta.dtype, shape=self.Input.meta.shape )
        
        # Prepare streamer
        streamer = BigRequestStreamer( self.Input,
                                       roiFromShape(self.Input.meta.shape),
                                       allowParallelResults=True )
        def handle_block_result(roi, block_result):
            final_result[roiToSlice(*roi)] = block_result
        streamer.resultSignal.subscribe( handle_block_result )
        streamer.progressSignal.subscribe( self.progressSignal )

        # Perform export
        streamer.execute()
        return final_result
예제 #8
0
    def run_export_to_array(self):
        # Allocate result
        final_result = numpy.ndarray(dtype=self.Input.meta.dtype, shape=self.Input.meta.shape)

        # Prepare streamer
        streamer = BigRequestStreamer(self.Input, roiFromShape(self.Input.meta.shape), allowParallelResults=True)

        def handle_block_result(roi, block_result):
            final_result[roiToSlice(*roi)] = block_result

        streamer.resultSignal.subscribe(handle_block_result)
        streamer.progressSignal.subscribe(self.progressSignal)

        # Perform export
        streamer.execute()
        return final_result
예제 #9
0
    def execute(self, slot, subindex, ignored_roi, result):
        configFilePath = self.ConfigFilePath.value
        config = parseClusterConfigFile( configFilePath )        
        
        blockwiseFileset = self._primaryBlockwiseFileset
        
        # Check axis compatibility
        inputAxes = self.Input.meta.getTaggedShape().keys()
        outputAxes = list(blockwiseFileset.description.axes)
        assert set(inputAxes) == set(outputAxes), \
            "Output dataset has the wrong set of axes.  Input axes: {}, Output axes: {}".format( "".join(inputAxes), "".join(outputAxes) )
        
        roiString = self.RoiString.value
        roi = Roi.loads(roiString)
        if len( roi.start ) != len( self.Input.meta.shape ):
            assert False, "Task roi: {} is not valid for this input.  Did the master launch this task correctly?".format( roiString )

        logger.info( "Executing for roi: {}".format(roi) )

        if config.use_node_local_scratch:
            assert False, "FIXME."

        assert (blockwiseFileset.getEntireBlockRoi( roi.start )[1] == roi.stop).all(), "Each task must execute exactly one full block.  ({},{}) is not a valid block roi.".format( roi.start, roi.stop )
        assert self.Input.ready()

        # Convert the task subrequest shape dict into a shape for this dataset (and axisordering)
        subrequest_shape = map( lambda tag: config.task_subrequest_shape[tag.key], self.Input.meta.axistags )
        primary_subrequest_shape = self._primaryBlockwiseFileset.description.sub_block_shape
        if primary_subrequest_shape is not None:
            # If the output dataset specified a sub_block_shape, override the cluster config
            subrequest_shape = primary_subrequest_shape

        with Timer() as computeTimer:
            # Stream the data out to disk.
            streamer = BigRequestStreamer(self.Input, (roi.start, roi.stop), subrequest_shape, config.task_parallel_subrequests )
            streamer.progressSignal.subscribe( self.progressSignal )
            streamer.resultSignal.subscribe( self._handlePrimaryResultBlock )
            streamer.execute()

            # Now the block is ready.  Update the status.
            blockwiseFileset.setBlockStatus( roi.start, BlockwiseFileset.BLOCK_AVAILABLE )

        logger.info( "Finished task in {} seconds".format( computeTimer.seconds() ) )
        result[0] = True
        return result
예제 #10
0
    def testBasic(self):
        op = OpArrayPiper(graph=Graph())
        inputData = numpy.indices((100, 100)).sum(0)
        op.Input.setValue(inputData)

        results = numpy.zeros((100, 100), dtype=numpy.int32)
        resultslock = threading.Lock()

        resultsCount = [0]

        def handleResult(roi, result):
            acquired = resultslock.acquire(False)
            assert acquired, "resultslock is contested! Access to callback is supposed to be automatically serialized."
            results[roiToSlice(*roi)] = result
            logger.debug("Got result for {}".format(roi))
            resultslock.release()
            resultsCount[0] += 1

        progressList = []

        def handleProgress(progress):
            progressList.append(progress)
            logger.debug("Progress update: {}".format(progress))

        totalVolume = numpy.prod(inputData.shape)
        batch = BigRequestStreamer(op.Output, [(0, 0), (100, 100)], (10, 10))
        batch.resultSignal.subscribe(handleResult)
        batch.progressSignal.subscribe(handleProgress)

        batch.execute()
        logger.debug("Got {} results".format(resultsCount[0]))
        assert (results == inputData).all()

        # Progress reporting MUST start with 0 and end with 100
        assert progressList[0] == 0, "Invalid progress reporting."
        assert progressList[-1] == 100, "Invalid progress reporting."

        # There should be some intermediate progress reporting, but exactly how much is unspecified.
        assert len(progressList) >= 10

        logger.debug("FINISHED")
예제 #11
0
    def testBasic(self):
        op = OpArrayPiper(graph=Graph())
        inputData = numpy.indices((100, 100)).sum(0)
        op.Input.setValue(inputData)

        results = numpy.zeros((100, 100), dtype=numpy.int32)
        resultslock = threading.Lock()

        resultsCount = [0]

        def handleResult(roi, result):
            acquired = resultslock.acquire(False)
            assert acquired, "resultslock is contested! Access to callback is supposed to be automatically serialized."
            results[roiToSlice(*roi)] = result
            logger.debug("Got result for {}".format(roi))
            resultslock.release()
            resultsCount[0] += 1

        progressList = []

        def handleProgress(progress):
            progressList.append(progress)
            logger.debug("Progress update: {}".format(progress))

        totalVolume = numpy.prod(inputData.shape)
        batch = BigRequestStreamer(op.Output, [(0, 0), (100, 100)], (10, 10))
        batch.resultSignal.subscribe(handleResult)
        batch.progressSignal.subscribe(handleProgress)

        batch.execute()
        logger.debug("Got {} results".format(resultsCount[0]))
        assert (results == inputData).all()

        # Progress reporting MUST start with 0 and end with 100
        assert progressList[0] == 0, "Invalid progress reporting."
        assert progressList[-1] == 100, "Invalid progress reporting."

        # There should be some intermediate progress reporting, but exactly how much is unspecified.
        assert len(progressList) >= 10

        logger.debug("FINISHED")
예제 #12
0
def test_pool_results_discarded():
    """
    This test checks to make sure that result arrays are discarded in turn as the BigRequestStreamer executes.
    (If they weren't discarded in real time, then it's possible to end up consuming a lot of RAM until the streamer finally finishes.)
    """
    result_refs = []

    def handle_result(roi, result):
        result_refs.append(weakref.ref(result))

        # In this test, all results are discarded immediately after the
        #  request exits.  Therefore, AT NO POINT IN TIME, should more than N requests be alive.
        live_result_refs = [w for w in result_refs if w() is not None]
        assert (
            len(live_result_refs) <= Request.global_thread_pool.num_workers
        ), "There should not be more than {} result references alive at one time!".format(
            Request.global_thread_pool.num_workers)

    def handle_progress(progress):
        logger.debug(
            "test_pool_results_discarded: progress: {}".format(progress))

    op = OpNonsense(graph=Graph())
    batch = BigRequestStreamer(op.Output, [(0, 0, 0), (100, 1000, 1000)],
                               (100, 100, 100))
    batch.resultSignal.subscribe(handle_result)
    batch.progressSignal.subscribe(handle_progress)
    batch.execute()

    # This test verifies that
    #  (1) references to all child requests have been discarded once the pool is complete, and
    #  (2) therefore, all references to the RESULTS in those child requests are also discarded.
    # There is a tiny window of time between a request being 'complete' (for all intents and purposes),
    #  but before its main execute function has exited back to the main ThreadPool._Worker loop.
    #  The request is not finally discarded until that loop discards it, so let's wait a tiny extra bit of time.
    time.sleep(0.01)

    # Now check that ALL results are truly lost.
    for ref in result_refs:
        assert ref() is None, "Some data was not discarded."
예제 #13
0
def test_pool_results_discarded():
    """
    This test checks to make sure that result arrays are discarded in turn as the BigRequestStreamer executes.
    (If they weren't discarded in real time, then it's possible to end up consuming a lot of RAM until the streamer finally finishes.)
    """
    result_refs = []
    def handle_result(roi, result):
        result_refs.append( weakref.ref(result) )

        # In this test, all results are discarded immediately after the 
        #  request exits.  Therefore, AT NO POINT IN TIME, should more than N requests be alive.
        live_result_refs = filter(lambda w:w() is not None, result_refs)
        assert len(live_result_refs) <= Request.global_thread_pool.num_workers, \
            "There should not be more than {} result references alive at one time!"\
            .format( Request.global_thread_pool.num_workers )

    def handle_progress( progress ):
        logger.debug("test_pool_results_discarded: progress: {}".format(progress))
          
    op = OpNonsense( graph=Graph() )
    batch = BigRequestStreamer(op.Output, [(0,0,0), (100,1000,1000)], (100,100,100) )
    batch.resultSignal.subscribe( handle_result )
    batch.progressSignal.subscribe( handle_progress )
    batch.execute()
  
    # This test verifies that
    #  (1) references to all child requests have been discarded once the pool is complete, and
    #  (2) therefore, all references to the RESULTS in those child requests are also discarded.
    # There is a tiny window of time between a request being 'complete' (for all intents and purposes),
    #  but before its main execute function has exited back to the main ThreadPool._Worker loop.
    #  The request is not finally discarded until that loop discards it, so let's wait a tiny extra bit of time.
    time.sleep(0.01)
      
    # Now check that ALL results are truly lost.
    for ref in result_refs:
        assert ref() is None, "Some data was not discarded."
예제 #14
0
    def testBadMemoryConditions(self):
        """
        TestCacheMemoryManager.testBadMemoryConditions

        This test is a proof of the proposition in
            https://github.com/ilastik/lazyflow/issue/185
        which states that, given certain memory constraints, the cache
        cleanup strategy in use is inefficient. An advanced strategy
        should pass the test.
        """

        mgr = CacheMemoryManager()
        mgr.setRefreshInterval(0.01)
        mgr.enable()

        d = 2
        tags = "xy"

        shape = (999,) * d
        blockshape = (333,) * d

        # restrict memory for computation to one block (including fudge
        # factor 2 of bigRequestStreamer)
        cacheMem = np.prod(shape)
        Memory.setAvailableRam(np.prod(blockshape) * 2 + cacheMem)

        # restrict cache memory to the whole volume
        Memory.setAvailableRamCaches(cacheMem)

        # to ease observation, do everything single threaded
        Request.reset_thread_pool(num_workers=1)

        x = np.zeros(shape, dtype=np.uint8)
        x = vigra.taggedView(x, axistags=tags)

        g = Graph()
        pipe = OpArrayPiperWithAccessCount(graph=g)
        pipe.Input.setValue(x)
        pipe.Output.meta.ideal_blockshape = blockshape

        # simulate BlockedArrayCache behaviour without caching
        # cache = OpSplitRequestsBlockwise(True, graph=g)
        # cache.BlockShape.setValue(blockshape)
        # cache.Input.connect(pipe.Output)

        cache = OpBlockedArrayCache(graph=g)
        cache.Input.connect(pipe.Output)
        cache.BlockShape.setValue(blockshape)

        op = OpEnlarge(graph=g)
        op.Input.connect(cache.Output)

        split = OpSplitRequestsBlockwise(True, graph=g)
        split.BlockShape.setValue(blockshape)
        split.Input.connect(op.Output)

        streamer = BigRequestStreamer(split.Output, [(0,) * len(shape), shape])
        streamer.execute()

        # in the worst case, we have 4*4 + 4*6 + 9 = 49 requests to pipe
        # in the best case, we have 9
        np.testing.assert_equal(pipe.accessCount, 9)
예제 #15
0
    def testBadMemoryConditions(self):
        """
        TestCacheMemoryManager.testBadMemoryConditions

        This test is a proof of the proposition in
            https://github.com/ilastik/lazyflow/issue/185
        which states that, given certain memory constraints, the cache
        cleanup strategy in use is inefficient. An advanced strategy
        should pass the test.
        """

        mgr = _CacheMemoryManager()
        mgr.setRefreshInterval(0.01)
        mgr.enable()

        d = 2
        tags = "xy"

        shape = (999,) * d
        blockshape = (333,) * d

        # restrict memory for computation to one block (including fudge
        # factor 2 of bigRequestStreamer)
        cacheMem = np.prod(shape)
        Memory.setAvailableRam(np.prod(blockshape) * 2 + cacheMem)

        # restrict cache memory to the whole volume
        Memory.setAvailableRamCaches(cacheMem)

        # to ease observation, do everything single threaded
        Request.reset_thread_pool(num_workers=1)

        x = np.zeros(shape, dtype=np.uint8)
        x = vigra.taggedView(x, axistags=tags)

        g = Graph()
        pipe = OpArrayPiperWithAccessCount(graph=g)
        pipe.Input.setValue(x)
        pipe.Output.meta.ideal_blockshape = blockshape

        # simulate BlockedArrayCache behaviour without caching
        # cache = OpSplitRequestsBlockwise(True, graph=g)
        # cache.BlockShape.setValue(blockshape)
        # cache.Input.connect(pipe.Output)

        cache = OpBlockedArrayCache(graph=g)
        cache.Input.connect(pipe.Output)
        cache.BlockShape.setValue(blockshape)

        op = OpEnlarge(graph=g)
        op.Input.connect(cache.Output)

        split = OpSplitRequestsBlockwise(True, graph=g)
        split.BlockShape.setValue(blockshape)
        split.Input.connect(op.Output)
        streamer = BigRequestStreamer(split.Output, [(0,) * len(shape), shape])
        streamer.execute()

        # in the worst case, we have 4*4 + 4*6 + 9 = 49 requests to pipe
        # in the best case, we have 9
        np.testing.assert_equal(pipe.accessCount, 9)
예제 #16
0
    def testForMemoryLeaks(self):
        """
        If the BigRequestStreamer doesn't clean requests as they complete, they'll take up too much memory.
        """
        class OpNonsense(Operator):
            """
            Provide nonsense data of the correct shape for each request.
            """
            Output = OutputSlot()

            def setupOutputs(self):
                self.Output.meta.dtype = numpy.float32
                self.Output.meta.shape = (2000, 2000, 2000)

            def execute(self, slot, subindex, roi, result):
                """
                Simulate a cascade of requests, to make sure that the entire cascade is properly freed.
                """
                roiShape = roi.stop - roi.start

                def getResults1():
                    return numpy.indices(roiShape,
                                         self.Output.meta.dtype).sum()

                def getResults2():
                    req = Request(getResults1)
                    req.submit()
                    result[:] = req.wait()
                    return result

                req = Request(getResults2)
                req.submit()
                result[:] = req.wait()
                return result

            def propagateDirty(self, slot, subindex, roi):
                pass

        gc.collect()

        vmem = psutil.virtual_memory()
        start_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        logger.debug("Starting test with memory usage at: {} MB".format(
            start_mem_usage_mb))

        op = OpNonsense(graph=Graph())

        def handleResult(roi, result):
            pass

        def handleProgress(progress):
            #gc.collect()
            logger.debug("Progress update: {}".format(progress))
            #vmem = psutil.virtual_memory()
            #finished_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
            #difference_mb = finished_mem_usage_mb - start_mem_usage_mb
            #logger.debug( "Progress update: {} with memory usage at: {} MB ({} MB increase)".format( progress, finished_mem_usage_mb, difference_mb ) )

        batch = BigRequestStreamer(op.Output, [(0, 0, 0), (100, 1000, 1000)],
                                   (100, 100, 100))
        batch.resultSignal.subscribe(handleResult)
        batch.progressSignal.subscribe(handleProgress)
        batch.execute()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug(
            "Finished execution with memory usage at: {} MB ({} MB increase)".
            format(finished_mem_usage_mb, difference_mb))

        # Collect
        gc.collect()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000 * 1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug(
            "Finished test with memory usage at: {} MB ({} MB increase)".
            format(finished_mem_usage_mb, difference_mb))
        assert difference_mb < 200, "BigRequestStreamer seems to have memory leaks.  After executing, RAM usage increased by {}".format(
            difference_mb)
예제 #17
0
    def testForMemoryLeaks(self):
        """
        If the BigRequestStreamer doesn't clean requests as they complete, they'll take up too much memory.
        """
        
        class OpNonsense( Operator ):
            """
            Provide nonsense data of the correct shape for each request.
            """
            Output = OutputSlot()

            def setupOutputs(self):
                self.Output.meta.dtype = numpy.float32
                self.Output.shape = (2000, 2000, 2000)
    
            def execute(self, slot, subindex, roi, result):
                """
                Simulate a cascade of requests, to make sure that the entire cascade is properly freed.
                """
                roiShape = roi.stop - roi.start
                def getResults1():
                    return numpy.indices(roiShape, self.Output.meta.dtype).sum()
                def getResults2():
                    req = Request( getResults1 )
                    req.submit()
                    result[:] = req.wait()
                    return result

                req = Request( getResults2 )
                req.submit()
                result[:] = req.wait()
                return result
        
            def propagateDirty(self, slot, subindex, roi):
                pass

        gc.collect()

        vmem = psutil.virtual_memory()
        start_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
        logger.debug( "Starting test with memory usage at: {} MB".format( start_mem_usage_mb ) )

        op = OpNonsense( graph=Graph() )
        def handleResult( roi, result ):
            pass

        def handleProgress( progress ):
            #gc.collect()
            logger.debug( "Progress update: {}".format( progress ) )
            #vmem = psutil.virtual_memory()
            #finished_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
            #difference_mb = finished_mem_usage_mb - start_mem_usage_mb
            #logger.debug( "Progress update: {} with memory usage at: {} MB ({} MB increase)".format( progress, finished_mem_usage_mb, difference_mb ) )

        batch = BigRequestStreamer(op.Output, [(0,0,0), (100,1000,1000)], (100,100,100) )
        batch.resultSignal.subscribe( handleResult )
        batch.progressSignal.subscribe( handleProgress )
        batch.execute()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug( "Finished execution with memory usage at: {} MB ({} MB increase)".format( finished_mem_usage_mb, difference_mb ) )

        # Collect
        gc.collect()

        vmem = psutil.virtual_memory()
        finished_mem_usage_mb = (vmem.total - vmem.available) / (1000*1000)
        difference_mb = finished_mem_usage_mb - start_mem_usage_mb
        logger.debug( "Finished test with memory usage at: {} MB ({} MB increase)".format( finished_mem_usage_mb, difference_mb ) )
        assert difference_mb < 200, "BigRequestStreamer seems to have memory leaks.  After executing, RAM usage increased by {}".format( difference_mb )