Ejemplo n.º 1
0
def test_RequestLock():
    assert Request.global_thread_pool.num_workers > 0, \
        "This test must be used with the real threadpool."

    lockA = RequestLock()
    lockB = RequestLock()

    def log_request_system_status():
        status = (
            "*************************\n" +
            'lockA.pending: {}\n'.format(len(lockA._pendingRequests)) +
            'lockB.pending: {}\n'.format(len(lockB._pendingRequests))
            #+ "suspended Requests: {}\n".format( len(Request.global_suspend_set) )
            + "global job queue: {}\n".format(
                len(Request.global_thread_pool.unassigned_tasks)))
        for worker in Request.global_thread_pool.workers:
            status += "{} queued tasks: {}\n".format(worker.name,
                                                     len(worker.job_queue))
        status += "*****************************************************"
        logger.debug(status)

    running = [True]

    def periodic_status():
        while running[0]:
            time.sleep(0.5)
            log_request_system_status()

    # Uncomment these lines to print periodic status while the test runs...
    status_thread = threading.Thread(target=periodic_status)
    status_thread.daemon = True
    status_thread.start()

    try:
        _impl_test_lock(lockA, lockB, Request, 1000)
    except:
        log_request_system_status()
        running[0] = False
        status_thread.join()

        global paused
        paused = False

        Request.reset_thread_pool(Request.global_thread_pool.num_workers)

        if lockA.locked():
            lockA.release()
        if lockB.locked():
            lockB.release()

        raise

    log_request_system_status()
    running[0] = False
    status_thread.join()
Ejemplo n.º 2
0
    def __init__(self, *args, **kwargs):
        super(OpFeatureMatrixCache, self).__init__(*args, **kwargs)
        self._lock = RequestLock()

        self.progressSignal = OrderedSignal()
        self._progress_lock = RequestLock()

        self._blockshape = None
        self._dirty_blocks = set()
        self._blockwise_feature_matrices = {}
        self._block_locks = {}  # One lock per stored block

        self._init_blocks(None, None)
Ejemplo n.º 3
0
    def __init__(self, *args, **kwargs):
        super(OpFeatureMatrixCache, self).__init__(*args, **kwargs)
        self._blockshape = None
        self._lock = RequestLock()

        self.progressSignal = OrderedSignal()
        self._progress_lock = RequestLock()

        # In these set/dict members, the block id (dict key)
        #  is simply the block's start coordinate (as a tuple)
        self._blockwise_feature_matrices = {}
        self._dirty_blocks = set()
        self._block_locks = {}  # One lock per stored block
Ejemplo n.º 4
0
 def __init__(self, *args, **kwargs):
     super(OpUnmanagedCompressedCache, self).__init__(*args, **kwargs)
     self._lock = RequestLock()
     self._init_cache(None)
     self._block_id_counter = itertools.count(
     )  # Used to ensure unique in-memory file names
     self._ignore_ideal_blockshape = False
    def predict_probabilities(self, X):
        logger.debug("Predicting with parallel vigra RF")
        X = numpy.asarray(X, dtype=numpy.float32)
        assert X.ndim == 2

        if self._feature_names is not None:
            # For some reason, vigra doesn't seem to check this for us...
            assert X.shape[1] == len(self._feature_names), \
                "Feature count doesn't match the training data."

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()

        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request(partial(forest.predictProbabilities, X))
            req.notify_finished(partial(update_predictions, forest))
            pool.add(req)
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
Ejemplo n.º 6
0
 def _update_block(self, block_start):
     if block_start not in self._block_locks:
         with self._lock:
             if block_start not in self._block_locks:
                 self._block_locks[block_start] = RequestLock()
     with self._block_locks[block_start]:
         if block_start not in self._dirty_blocks:
             # Nothing to do if this block isn't actually dirty
             # (For parallel requests, its theoretically possible.)
             return
         block_roi = getBlockBounds(self.LabelImage.meta.shape,
                                    self._blockshape, block_start)
         # TODO: Shrink the requested roi using the nonzero blocks slot...
         #       ...or just get rid of the nonzero blocks slot...
         labels_and_features_matrix = self._extract_feature_matrix(
             block_roi)
         with self._lock:
             self._dirty_blocks.remove(block_start)
             if labels_and_features_matrix.shape[0] > 0:
                 self._blockwise_feature_matrices[
                     block_start] = labels_and_features_matrix
             else:
                 try:
                     del self._blockwise_feature_matrices[block_start]
                 except KeyError:
                     pass
Ejemplo n.º 7
0
    def predict_probabilities(self, X):
        logger.debug( "Predicting with parallel vigra RF" )
        X = numpy.asarray(X, dtype=numpy.float32)

        # As each forest completes, aggregate results in a shared array.
        # (Must put in a list so we can update it in this closure.)
        total_predictions = [None]
        prediction_lock = RequestLock()
        def update_predictions(forest, forest_predictions):
            forest_predictions *= forest.treeCount()
            with prediction_lock:
                if total_predictions[0] is None:
                    total_predictions[0] = forest_predictions
                else:
                    total_predictions[0] += forest_predictions

        # Create a request for each forest
        pool = RequestPool()
        for forest in self._forests:
            req = Request( partial( forest.predictProbabilities, X ) )
            req.notify_finished( partial(update_predictions, forest) )
            pool.add( req )
        del req
        pool.wait()

        total_predictions[0] /= self._num_trees
        return total_predictions[0]
    def setupOutputs(self):
        self.Predictions.meta.shape = self.Features.meta.shape
        self.Predictions.meta.dtype = object
        self.Predictions.meta.axistags = None
        self.Predictions.meta.mapping_dtype = numpy.uint8

        self.Probabilities.meta.shape = self.Features.meta.shape
        self.Probabilities.meta.dtype = object
        self.Probabilities.meta.mapping_dtype = numpy.float32
        self.Probabilities.meta.axistags = None

        self.BadObjects.meta.shape = self.Features.meta.shape
        self.BadObjects.meta.dtype = object
        self.BadObjects.meta.mapping_dtype = numpy.uint8
        self.BadObjects.meta.axistags = None

        if self.LabelsCount.ready():
            nlabels = self.LabelsCount[:].wait()
            nlabels = int(nlabels[0])
            self.ProbabilityChannels.resize(nlabels)
            for oslot in self.ProbabilityChannels:
                oslot.meta.shape = self.Features.meta.shape
                oslot.meta.dtype = object
                oslot.meta.axistags = None
                oslot.meta.mapping_dtype = numpy.float32

        self.lock = RequestLock()
        self.prob_cache = dict()
        self.bad_objects = dict()
Ejemplo n.º 9
0
    def _fetch_and_store_block(self, block_roi, out):
        if out is not None:
            roi_shape = numpy.array(block_roi[1]) - block_roi[0]
            assert (out.shape == roi_shape).all()

        # Get lock for this block (create first if necessary)
        with self._lock:
            if block_roi not in self._block_locks:
                self._block_locks[block_roi] = RequestLock()
            block_lock = self._block_locks[block_roi]

        # Handle identical simultaneous requests for the same block
        # without preventing parallel requests for different blocks.
        with block_lock:
            if block_roi in self._block_data:
                if out is None:
                    # Extra [:] here is in case we are decompressing from a chunkedarray
                    return self._block_data[block_roi][:]
                else:
                    # Extra [:] here is in case we are decompressing from a chunkedarray
                    self.Output.stype.copy_data(out,
                                                self._block_data[block_roi][:])
                    return out

            req = self.Input(*block_roi)
            if out is not None:
                req.writeInto(out)
            block_data = req.wait()
            self._store_block_data(block_roi, block_data)
        return block_data
Ejemplo n.º 10
0
 def __init__(self, *args, **kwargs):
     super(OpCompressedCache, self).__init__(*args, **kwargs)
     self._blockshape = None
     self._cacheFiles = {}
     self._dirtyBlocks = set()
     self._lock = RequestLock()
     self._blockLocks = {}
Ejemplo n.º 11
0
    def __init__(self, *args, **kwargs):
        super(OpUnblockedArrayCache, self).__init__(*args, **kwargs)
        self._lock = RequestLock()
        self._resetBlocks()

        # Now that we're initialized, it's safe to register with the memory manager
        self.registerWithMemoryManager()
Ejemplo n.º 12
0
    def setInSlot(self, slot, subindex, roi, block_data):
        assert slot == self.Input
        block_roi = (tuple(roi.start), tuple(roi.stop))

        with self._lock:
            if block_roi not in self._block_locks:
                self._block_locks[block_roi] = RequestLock()
            block_lock = self._block_locks[block_roi]

        with block_lock:
            self._store_block_data(block_roi, block_data)
Ejemplo n.º 13
0
    def _getCacheFile(self, entire_block_roi):
        """
        Get the cache file for the block that starts at block_start.
        If it doesn't exist yet, create it first.
        """
        block_start = tuple(entire_block_roi[0])
        if block_start in self._cacheFiles:
            return self._cacheFiles[block_start]
        with self._lock:
            if block_start not in self._cacheFiles:
                # Create an in-memory hdf5 file with a unique name
                # (the counter ensures that even blocks that have been deleted previously get a unique name when they are re-created).
                logger.debug("Creating a cache file for block: {}".format(
                    list(block_start)))
                filename = str(id(self)) + str(id(
                    self._cacheFiles)) + str(block_start) + str(
                        self._block_id_counter.next())
                mem_file = h5py.File(filename,
                                     driver='core',
                                     backing_store=False,
                                     mode='w')

                # h5py will crash if the chunkshape is larger than the dataset shape.
                datashape = tuple(entire_block_roi[1] - entire_block_roi[0])
                chunkshape = numpy.minimum(numpy.array(datashape),
                                           self._chunkshape)
                chunkshape = tuple(chunkshape)

                # Make a compressed dataset
                mem_file.create_dataset(
                    'data',
                    shape=datashape,
                    dtype=self.Output.meta.dtype,
                    chunks=chunkshape,
                    compression='lzf')  # lzf should be faster than gzip,
                # with a slightly worse compression ratio
                # Add mask information if needed.
                if self.Output.meta.has_mask:
                    mem_file.create_dataset(
                        'mask',
                        shape=datashape,
                        dtype=bool,
                        chunks=chunkshape,
                        compression='lzf')  # lzf should be faster than gzip,
                    # with a slightly worse compression ratio
                    mem_file.create_dataset('fill_value',
                                            shape=tuple(),
                                            dtype=self.Output.meta.dtype)

                self._blockLocks[block_start] = RequestLock()
                self._cacheFiles[block_start] = mem_file
                self._dirtyBlocks.add(block_start)
            return self._cacheFiles[block_start]
Ejemplo n.º 14
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            # Does this roi happen to fit ENTIRELY within an existing stored block?
            outer_rois = containing_rois(self._block_data.keys(),
                                         (roi.start, roi.stop))
            if len(outer_rois) > 0:
                # Use the first one we found
                block_roi = self._standardize_roi(*outer_rois[0])
                block_relative_roi = numpy.array(
                    (roi.start, roi.stop)) - block_roi[0]
                self.Output.stype.copy_data(
                    result, self._block_data[block_roi][roiToSlice(
                        *block_relative_roi)])
                return

        # Standardize roi for usage as dict key
        block_roi = self._standardize_roi(roi.start, roi.stop)

        # Get lock for this block (create first if necessary)
        with self._lock:
            if block_roi not in self._block_locks:
                self._block_locks[block_roi] = RequestLock()
            block_lock = self._block_locks[block_roi]

        # Handle identical simultaneous requests
        with block_lock:
            try:
                self.Output.stype.copy_data(result,
                                            self._block_data[block_roi])
                return
            except KeyError:  # Not yet stored: Request it now.

                # We attach a special attribute to the array to allow the upstream operator
                #  to optionally tell us not to bother caching the data.
                self.Input(roi.start, roi.stop).writeInto(result).block()

                if self.Input.meta.dontcache:
                    # The upstream operator says not to bother caching the data.
                    # (For example, see OpCacheFixer.)
                    return

                block = result.copy()
                with self._lock:
                    # Store the data.
                    # First double-check that the block wasn't removed from the
                    #   cache while we were requesting it.
                    # (Could have happened via propagateDirty() or eventually the arrayCacheMemoryMgr)
                    if block_roi in self._block_locks:
                        self._block_data[block_roi] = block
            self._last_access_times[block_roi] = time.time()
Ejemplo n.º 15
0
    def __init__(self, *args, **kwargs):
        super(OpPatchCreator, self).__init__(*args, **kwargs)
        self.patches = None
        self.posns = None
        self.lock = RequestLock()

        self.opGrid = opGridCreator.OpGridCreator(graph=Graph())
        self.GridOutput.connect(self.opGrid.Output)

        self.opGrid.GridStartVertical.connect(self.GridStartVertical)
        self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal)
        self.opGrid.GridWidth.connect(self.GridWidth)
        self.opGrid.GridHeight.connect(self.GridHeight)
        self.opGrid.PatchWidth.connect(self.PatchWidth)
        self.opGrid.PatchHeight.connect(self.PatchHeight)
    def execute(self, slot, subindex, roi, result):
        assert slot == self.ConcatenatedOutput
        self.progressSignal(0.0)

        num_dirty_slots = len(self._dirty_slots)
        subtask_progress = {}
        progress_lock = RequestLock()

        def forward_progress_updates(feature_slot, progress):
            with progress_lock:
                subtask_progress[feature_slot] = progress
                total_progress = 0.95 * sum(
                    subtask_progress.values()) / num_dirty_slots
            self.progressSignal(total_progress)

        logger.debug( "Updating features for {} dirty images out of {}"\
                      "".format( len(self._dirty_slots), len(self.FeatureMatrices) ) )

        pool = RequestPool()
        subresults = []
        for feature_slot, progress_slot in zip(self.FeatureMatrices,
                                               self.ProgressSignals):
            subresults.append([None])
            req = feature_slot[:]
            req.writeInto(subresults[-1])

            # Only use progress for slots that were dirty.
            # The others are going to be really fast.
            if feature_slot in self._dirty_slots:
                sub_progress_signal = progress_slot.value
                sub_progress_signal.subscribe(
                    partial(forward_progress_updates, feature_slot))
            pool.add(req)
        pool.wait()

        # Reset dirty slots
        self._dirty_slots = set()

        # Since the subresults are returned in 'value' slots,
        #  we have to unpack them from their single-element lists.
        subresult_list = list(itertools.chain(*subresults))

        total_matrix = numpy.concatenate(subresult_list, axis=0)
        self.progressSignal(100.0)
        result[0] = total_matrix
Ejemplo n.º 17
0
def test_cancellation_behavior():
    """
    If a request is cancelled while it was waiting on a lock,
    it should raise the CancellationException.
    """
    lock = RequestLock()
    lock.acquire()

    def f():
        try:
            with lock:
                assert False
        except Request.CancellationException:
            pass
        else:
            assert False

    finished = [False]
    cancelled = [False]
    failed = [False]

    def handle_finished(result):
        finished[0] = True

    def handle_cancelled():
        cancelled[0] = True

    def handle_failed(*args):
        failed[0] = True

    req = Request(f)
    req.notify_finished(handle_finished)
    req.notify_failed(handle_failed)
    req.notify_cancelled(handle_cancelled)

    req.submit()
    req.cancel()
    time.sleep(0.1)
    lock.release()
    time.sleep(0.1)
    assert not finished[0] and not failed[0] and cancelled[0]
Ejemplo n.º 18
0
    def _getCacheFile(self, entire_block_roi):
        """
        Get the cache file for the block that starts at block_start.
        If it doesn't exist yet, create it first.
        """
        block_start = tuple(entire_block_roi[0])
        if block_start in self._cacheFiles:
            return self._cacheFiles[block_start]
        with self._lock:
            if block_start not in self._cacheFiles:
                # Create an in-memory hdf5 file with a unique name
                logger.debug("Creating a cache file for block: {}".format(
                    list(block_start)))
                filename = str(id(self)) + str(id(
                    self._cacheFiles)) + str(block_start)
                mem_file = h5py.File(filename,
                                     driver='core',
                                     backing_store=False,
                                     mode='w')

                # h5py will crash if the chunkshape is larger than the dataset shape.
                datashape = tuple(entire_block_roi[1] - entire_block_roi[0])
                chunkshape = numpy.minimum(numpy.array(datashape),
                                           self._chunkshape)
                chunkshape = tuple(chunkshape)

                # Make a compressed dataset
                mem_file.create_dataset(
                    'data',
                    shape=datashape,
                    dtype=self.Output.meta.dtype,
                    chunks=chunkshape,
                    compression='lzf')  # lzf should be faster than gzip,
                # with a slightly worse compression ratio

                self._blockLocks[block_start] = RequestLock()
                self._cacheFiles[block_start] = mem_file
                self._dirtyBlocks.add(block_start)
            return self._cacheFiles[block_start]
Ejemplo n.º 19
0
    def execute(self, slot, subindex, roi, result):
        with self._lock:
            # Does this roi happen to fit ENTIRELY within an existing stored block?
            outer_rois = containing_rois( self._block_data.keys(), (roi.start, roi.stop) )
            if len(outer_rois) > 0:
                # Use the first one we found
                block_roi = self._standardize_roi( *outer_rois[0] )
                block_relative_roi = numpy.array( (roi.start, roi.stop) ) - block_roi[0]
                result[:] = self._block_data[block_roi][ roiToSlice(*block_relative_roi) ]
                return
                
        # Standardize roi for usage as dict key
        block_roi = self._standardize_roi( roi.start, roi.stop )
        
        # Get lock for this block (create first if necessary)
        with self._lock:
            if block_roi not in self._block_locks:
                self._block_locks[block_roi] = RequestLock()
            block_lock = self._block_locks[block_roi]

        # Handle identical simultaneous requests
        with block_lock:
            try:
                result[:] = self._block_data[block_roi]
                return
            except KeyError:
                # Not yet stored: Request it now.
                self.Input(roi.start, roi.stop).writeInto(result).block()
                block = result.copy()
                with self._lock:
                    # Store the data.
                    # First double-check that the block wasn't removed from the 
                    #   cache while we were requesting it. 
                    # (Could have happened via propagateDirty() or eventually the arrayCacheMemoryMgr)
                    if block_roi in self._block_locks:
                        self._block_data[block_roi] = block
Ejemplo n.º 20
0
 def __init__(self, *args, **kwargs):
     super(OpCompressedCache, self).__init__(*args, **kwargs)
     self._lock = RequestLock()
     self._init_cache(None)
Ejemplo n.º 21
0
 def __init__(self, *args, **kwargs):
     super( OpUnblockedArrayCache, self ).__init__(*args, **kwargs)
     self._lock = RequestLock()
     self._block_data = {}
     self._block_locks = {}
Ejemplo n.º 22
0
 def __init__(self, *args, **kwargs):
     super(OpTiffReader, self).__init__(*args, **kwargs)
     self._filepath = None
     self._tiff_file = None
     self._page_shape = None
     self._tiff_file_lock = RequestLock()
Ejemplo n.º 23
0
 def __init__(self, *args, **kwargs):
     super(OpGridCreator, self).__init__(*args, **kwargs)
     self.gridArray = None
     self.lock = RequestLock()
 def __init__(self, *args, **kwargs):
     super(self.__class__, self).__init__(*args, **kwargs)
     self._blockPipelines = {}  # indexed by blockstart
     self._lock = RequestLock()
Ejemplo n.º 25
0
    def execute(self, slot, subindex, roi, result):
        assert slot == self.LabelAndFeatureMatrix
        self.progressSignal(0.0)

        # Technically, this could result in strange progress reporting if execute()
        #  is called by multiple threads in parallel.
        # This could be fixed with some fancier progress state, but
        # (1) We don't expect that to by typical, and
        # (2) progress reporting is merely informational.
        num_dirty_blocks = len(self._dirty_blocks)
        remaining_dirty = [num_dirty_blocks]

        def update_progress(result):
            remaining_dirty[0] -= 1
            percent_complete = 95.0 * (num_dirty_blocks -
                                       remaining_dirty[0]) / num_dirty_blocks
            self.progressSignal(percent_complete)

        # Update all dirty blocks in the cache
        logger.debug("Updating {} dirty blocks".format(num_dirty_blocks))

        # Before updating the blocks, ensure that the necessary block locks exist
        # It's better to do this now instead of inside each request
        #  to avoid contention over self._lock
        with self._lock:
            for block_start in self._dirty_blocks:
                if block_start not in self._block_locks:
                    self._block_locks[block_start] = RequestLock()

        # Update each block in its own request.
        pool = RequestPool()
        reqs = {}
        for block_start in self._dirty_blocks:
            req = Request(partial(self._get_features_for_block, block_start))
            req.notify_finished(update_progress)
            reqs[block_start] = req
            pool.add(req)
        pool.wait()

        # Now store the results we got.
        # It's better to store the blocks here -- rather than within each request -- to
        #  avoid contention over self._lock from within every block's request.
        with self._lock:
            for block_start, req in reqs.items():
                if req.result is None:
                    # 'None' means the block wasn't dirty. No need to update.
                    continue
                labels_and_features_matrix = req.result
                self._dirty_blocks.remove(block_start)

                if labels_and_features_matrix.shape[0] > 0:
                    # Update the block entry with the new matrix.
                    self._blockwise_feature_matrices[
                        block_start] = labels_and_features_matrix
                else:
                    # All labels were removed from the block,
                    # So the new feature matrix is empty.
                    # Just delete its entry from our list.
                    try:
                        del self._blockwise_feature_matrices[block_start]
                    except KeyError:
                        pass

        # Concatenate the all blockwise results
        if self._blockwise_feature_matrices:
            total_feature_matrix = numpy.concatenate(
                self._blockwise_feature_matrices.values(), axis=0)
        else:
            # No label points at all.
            # Return an empty label&feature matrix (of the correct shape)
            num_feature_channels = self.FeatureImage.meta.shape[-1]
            total_feature_matrix = numpy.ndarray(shape=(0, 1 +
                                                        num_feature_channels),
                                                 dtype=numpy.float32)

        self.progressSignal(100.0)
        logger.debug("After update, there are {} clean blocks".format(
            len(self._blockwise_feature_matrices)))
        result[0] = total_feature_matrix