def setupOutputs(self): self.Predictions.meta.shape = self.Features.meta.shape self.Predictions.meta.dtype = object self.Predictions.meta.axistags = None self.Predictions.meta.mapping_dtype = numpy.uint8 self.Probabilities.meta.shape = self.Features.meta.shape self.Probabilities.meta.dtype = object self.Probabilities.meta.mapping_dtype = numpy.float32 self.Probabilities.meta.axistags = None self.BadObjects.meta.shape = self.Features.meta.shape self.BadObjects.meta.dtype = object self.BadObjects.meta.mapping_dtype = numpy.uint8 self.BadObjects.meta.axistags = None if self.LabelsCount.ready(): nlabels = self.LabelsCount[:].wait() nlabels = int(nlabels[0]) self.ProbabilityChannels.resize(nlabels) for oslot in self.ProbabilityChannels: oslot.meta.shape = self.Features.meta.shape oslot.meta.dtype = object oslot.meta.axistags = None oslot.meta.mapping_dtype = numpy.float32 self.lock = RequestLock() self.prob_cache = dict() self.bad_objects = dict()
def test_RequestLock(): assert Request.global_thread_pool.num_workers > 0, \ "This test must be used with the real threadpool." lockA = RequestLock() lockB = RequestLock() def log_request_system_status(): status = ( "*************************\n" + 'lockA.pending: {}\n'.format(len(lockA._pendingRequests)) + 'lockB.pending: {}\n'.format(len(lockB._pendingRequests)) #+ "suspended Requests: {}\n".format( len(Request.global_suspend_set) ) + "global job queue: {}\n".format( len(Request.global_thread_pool.unassigned_tasks))) for worker in Request.global_thread_pool.workers: status += "{} queued tasks: {}\n".format(worker.name, len(worker.job_queue)) status += "*****************************************************" logger.debug(status) running = [True] def periodic_status(): while running[0]: time.sleep(0.5) log_request_system_status() # Uncomment these lines to print periodic status while the test runs... status_thread = threading.Thread(target=periodic_status) status_thread.daemon = True status_thread.start() try: _impl_test_lock(lockA, lockB, Request, 1000) except: log_request_system_status() running[0] = False status_thread.join() global paused paused = False Request.reset_thread_pool(Request.global_thread_pool.num_workers) if lockA.locked(): lockA.release() if lockB.locked(): lockB.release() raise log_request_system_status() running[0] = False status_thread.join()
def __init__(self, *args, **kwargs): super(OpFeatureMatrixCache, self).__init__(*args, **kwargs) self._lock = RequestLock() self.progressSignal = OrderedSignal() self._progress_lock = RequestLock() self._blockshape = None self._dirty_blocks = set() self._blockwise_feature_matrices = {} self._block_locks = {} # One lock per stored block self._init_blocks(None, None)
def __init__(self, *args, **kwargs): super(OpFeatureMatrixCache, self).__init__(*args, **kwargs) self._blockshape = None self._lock = RequestLock() self.progressSignal = OrderedSignal() self._progress_lock = RequestLock() # In these set/dict members, the block id (dict key) # is simply the block's start coordinate (as a tuple) self._blockwise_feature_matrices = {} self._dirty_blocks = set() self._block_locks = {} # One lock per stored block
def __init__(self, *args, **kwargs): super(OpPatchCreator, self).__init__(*args, **kwargs) self.patches = None self.posns = None self.lock = RequestLock() self.opGrid = opGridCreator.OpGridCreator(graph=Graph()) self.GridOutput.connect(self.opGrid.Output) self.opGrid.GridStartVertical.connect(self.GridStartVertical) self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal) self.opGrid.GridWidth.connect(self.GridWidth) self.opGrid.GridHeight.connect(self.GridHeight) self.opGrid.PatchWidth.connect(self.PatchWidth) self.opGrid.PatchHeight.connect(self.PatchHeight)
def __init__(self, *args, **kwargs): super(OpCompressedCache, self).__init__(*args, **kwargs) self._blockshape = None self._cacheFiles = {} self._dirtyBlocks = set() self._lock = RequestLock() self._blockLocks = {}
def __init__(self, *args, **kwargs): super(OpUnblockedArrayCache, self).__init__(*args, **kwargs) self._lock = RequestLock() self._resetBlocks() # Now that we're initialized, it's safe to register with the memory manager self.registerWithMemoryManager()
def __init__(self, *args, **kwargs): super(OpUnmanagedCompressedCache, self).__init__(*args, **kwargs) self._lock = RequestLock() self._init_cache(None) self._block_id_counter = itertools.count( ) # Used to ensure unique in-memory file names self._ignore_ideal_blockshape = False
def predict_probabilities(self, X): logger.debug( "Predicting with parallel vigra RF" ) X = numpy.asarray(X, dtype=numpy.float32) # As each forest completes, aggregate results in a shared array. # (Must put in a list so we can update it in this closure.) total_predictions = [None] prediction_lock = RequestLock() def update_predictions(forest, forest_predictions): forest_predictions *= forest.treeCount() with prediction_lock: if total_predictions[0] is None: total_predictions[0] = forest_predictions else: total_predictions[0] += forest_predictions # Create a request for each forest pool = RequestPool() for forest in self._forests: req = Request( partial( forest.predictProbabilities, X ) ) req.notify_finished( partial(update_predictions, forest) ) pool.add( req ) del req pool.wait() total_predictions[0] /= self._num_trees return total_predictions[0]
def predict_probabilities(self, X): logger.debug("Predicting with parallel vigra RF") X = numpy.asarray(X, dtype=numpy.float32) assert X.ndim == 2 if self._feature_names is not None: # For some reason, vigra doesn't seem to check this for us... assert X.shape[1] == len(self._feature_names), \ "Feature count doesn't match the training data." # As each forest completes, aggregate results in a shared array. # (Must put in a list so we can update it in this closure.) total_predictions = [None] prediction_lock = RequestLock() def update_predictions(forest, forest_predictions): forest_predictions *= forest.treeCount() with prediction_lock: if total_predictions[0] is None: total_predictions[0] = forest_predictions else: total_predictions[0] += forest_predictions # Create a request for each forest pool = RequestPool() for forest in self._forests: req = Request(partial(forest.predictProbabilities, X)) req.notify_finished(partial(update_predictions, forest)) pool.add(req) del req pool.wait() total_predictions[0] /= self._num_trees return total_predictions[0]
def _update_block(self, block_start): if block_start not in self._block_locks: with self._lock: if block_start not in self._block_locks: self._block_locks[block_start] = RequestLock() with self._block_locks[block_start]: if block_start not in self._dirty_blocks: # Nothing to do if this block isn't actually dirty # (For parallel requests, its theoretically possible.) return block_roi = getBlockBounds(self.LabelImage.meta.shape, self._blockshape, block_start) # TODO: Shrink the requested roi using the nonzero blocks slot... # ...or just get rid of the nonzero blocks slot... labels_and_features_matrix = self._extract_feature_matrix( block_roi) with self._lock: self._dirty_blocks.remove(block_start) if labels_and_features_matrix.shape[0] > 0: self._blockwise_feature_matrices[ block_start] = labels_and_features_matrix else: try: del self._blockwise_feature_matrices[block_start] except KeyError: pass
def _fetch_and_store_block(self, block_roi, out): if out is not None: roi_shape = numpy.array(block_roi[1]) - block_roi[0] assert (out.shape == roi_shape).all() # Get lock for this block (create first if necessary) with self._lock: if block_roi not in self._block_locks: self._block_locks[block_roi] = RequestLock() block_lock = self._block_locks[block_roi] # Handle identical simultaneous requests for the same block # without preventing parallel requests for different blocks. with block_lock: if block_roi in self._block_data: if out is None: # Extra [:] here is in case we are decompressing from a chunkedarray return self._block_data[block_roi][:] else: # Extra [:] here is in case we are decompressing from a chunkedarray self.Output.stype.copy_data(out, self._block_data[block_roi][:]) return out req = self.Input(*block_roi) if out is not None: req.writeInto(out) block_data = req.wait() self._store_block_data(block_roi, block_data) return block_data
def test_RequestLock(): assert Request.global_thread_pool.num_workers > 0, "This test must be used with the real threadpool." lockA = RequestLock() lockB = RequestLock() def log_request_system_status(): status = ( "*************************\n" + "lockA.pending: {}\n".format(len(lockA._pendingRequests)) + "lockB.pending: {}\n".format(len(lockB._pendingRequests)) # + "suspended Requests: {}\n".format( len(Request.global_suspend_set) ) + "global job queue: {}\n".format(len(Request.global_thread_pool.unassigned_tasks)) ) for worker in Request.global_thread_pool.workers: status += "{} queued tasks: {}\n".format(worker.name, len(worker.job_queue)) status += "*****************************************************" logger.debug(status) running = [True] def periodic_status(): while running[0]: time.sleep(0.5) log_request_system_status() # Uncomment these lines to print periodic status while the test runs... status_thread = threading.Thread(target=periodic_status) status_thread.daemon = True status_thread.start() try: _impl_test_lock(lockA, lockB, Request, 1000) except: log_request_system_status() running[0] = False status_thread.join() global paused paused = False Request.reset_thread_pool(Request.global_thread_pool.num_workers) if lockA.locked(): lockA.release() if lockB.locked(): lockB.release() raise log_request_system_status() running[0] = False status_thread.join()
def setInSlot(self, slot, subindex, roi, block_data): assert slot == self.Input block_roi = (tuple(roi.start), tuple(roi.stop)) with self._lock: if block_roi not in self._block_locks: self._block_locks[block_roi] = RequestLock() block_lock = self._block_locks[block_roi] with block_lock: self._store_block_data(block_roi, block_data)
def test_cancellation_behavior(): """ If a request is cancelled while it was waiting on a lock, it should raise the CancellationException. """ lock = RequestLock() lock.acquire() def f(): try: with lock: assert False except Request.CancellationException: pass else: assert False finished = [False] cancelled = [False] failed = [False] def handle_finished(result): finished[0] = True def handle_cancelled(): cancelled[0] = True def handle_failed(*args): failed[0] = True req = Request(f) req.notify_finished(handle_finished) req.notify_failed(handle_failed) req.notify_cancelled(handle_cancelled) req.submit() req.cancel() time.sleep(0.1) lock.release() time.sleep(0.1) assert not finished[0] and not failed[0] and cancelled[0]
def _getCacheFile(self, entire_block_roi): """ Get the cache file for the block that starts at block_start. If it doesn't exist yet, create it first. """ block_start = tuple(entire_block_roi[0]) if block_start in self._cacheFiles: return self._cacheFiles[block_start] with self._lock: if block_start not in self._cacheFiles: # Create an in-memory hdf5 file with a unique name # (the counter ensures that even blocks that have been deleted previously get a unique name when they are re-created). logger.debug("Creating a cache file for block: {}".format( list(block_start))) filename = str(id(self)) + str(id( self._cacheFiles)) + str(block_start) + str( self._block_id_counter.next()) mem_file = h5py.File(filename, driver='core', backing_store=False, mode='w') # h5py will crash if the chunkshape is larger than the dataset shape. datashape = tuple(entire_block_roi[1] - entire_block_roi[0]) chunkshape = numpy.minimum(numpy.array(datashape), self._chunkshape) chunkshape = tuple(chunkshape) # Make a compressed dataset mem_file.create_dataset( 'data', shape=datashape, dtype=self.Output.meta.dtype, chunks=chunkshape, compression='lzf') # lzf should be faster than gzip, # with a slightly worse compression ratio # Add mask information if needed. if self.Output.meta.has_mask: mem_file.create_dataset( 'mask', shape=datashape, dtype=bool, chunks=chunkshape, compression='lzf') # lzf should be faster than gzip, # with a slightly worse compression ratio mem_file.create_dataset('fill_value', shape=tuple(), dtype=self.Output.meta.dtype) self._blockLocks[block_start] = RequestLock() self._cacheFiles[block_start] = mem_file self._dirtyBlocks.add(block_start) return self._cacheFiles[block_start]
class OpGridCreator(Operator): """Creates list of patches from all filter responses.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel GridStartVertical = InputSlot() # Vertical - start of patch grid in pixel GridStartHorizontal = InputSlot( ) # Horizontal - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel ImageWidth = InputSlot() # width of raw image ImageHeight = InputSlot() # height of raw image Output = OutputSlot() # number of patches in x/y-direction def __init__(self, *args, **kwargs): super(OpGridCreator, self).__init__(*args, **kwargs) self.gridArray = None self.lock = RequestLock() def setupOutputs(self): self.Output.meta.shape = (self.ImageWidth.value, self.ImageHeight.value) # the viewer uses a different coordinate system self.Output.meta.axistags = vigra.defaultAxistags('yx') self.Output.meta.dtype = numpy.uint8 def execute(self, slot, subindex, roi, result): """create grid""" try: self.lock.acquire() if self.gridArray is None: shape = (self.ImageHeight.value, self.ImageWidth.value) patch = (self.PatchHeight.value, self.PatchWidth.value) offset = (self.GridStartVertical.value, self.GridStartHorizontal.value) grid = (self.GridHeight.value, self.GridWidth.value) self.gridArray = make_grid(shape, patch, grid, offset) return self.gridArray[roi.toSlice()] finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.gridArray = None self.Output.setDirty(slice(None)) finally: self.lock.release()
class OpGridCreator(Operator): """Creates list of patches from all filter responses.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel GridStartVertical = InputSlot() # Vertical - start of patch grid in pixel GridStartHorizontal = InputSlot() # Horizontal - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel ImageWidth = InputSlot() # width of raw image ImageHeight = InputSlot() # height of raw image Output = OutputSlot() # number of patches in x/y-direction def __init__(self, *args, **kwargs): super(OpGridCreator, self).__init__(*args, **kwargs) self.gridArray = None self.lock = RequestLock() def setupOutputs(self): self.Output.meta.shape = (self.ImageWidth.value, self.ImageHeight.value) # the viewer uses a different coordinate system self.Output.meta.axistags = vigra.defaultAxistags('yx') self.Output.meta.dtype = numpy.uint8 def execute(self, slot, subindex, roi, result): """create grid""" try: self.lock.acquire() if self.gridArray is None: shape = (self.ImageHeight.value, self.ImageWidth.value) patch = (self.PatchHeight.value, self.PatchWidth.value) offset = (self.GridStartVertical.value, self.GridStartHorizontal.value) grid = (self.GridHeight.value, self.GridWidth.value) self.gridArray = make_grid(shape, patch, grid, offset) return self.gridArray[roi.toSlice()] finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.gridArray = None self.Output.setDirty(slice(None)) finally: self.lock.release()
def execute(self, slot, subindex, roi, result): with self._lock: # Does this roi happen to fit ENTIRELY within an existing stored block? outer_rois = containing_rois(self._block_data.keys(), (roi.start, roi.stop)) if len(outer_rois) > 0: # Use the first one we found block_roi = self._standardize_roi(*outer_rois[0]) block_relative_roi = numpy.array( (roi.start, roi.stop)) - block_roi[0] self.Output.stype.copy_data( result, self._block_data[block_roi][roiToSlice( *block_relative_roi)]) return # Standardize roi for usage as dict key block_roi = self._standardize_roi(roi.start, roi.stop) # Get lock for this block (create first if necessary) with self._lock: if block_roi not in self._block_locks: self._block_locks[block_roi] = RequestLock() block_lock = self._block_locks[block_roi] # Handle identical simultaneous requests with block_lock: try: self.Output.stype.copy_data(result, self._block_data[block_roi]) return except KeyError: # Not yet stored: Request it now. # We attach a special attribute to the array to allow the upstream operator # to optionally tell us not to bother caching the data. self.Input(roi.start, roi.stop).writeInto(result).block() if self.Input.meta.dontcache: # The upstream operator says not to bother caching the data. # (For example, see OpCacheFixer.) return block = result.copy() with self._lock: # Store the data. # First double-check that the block wasn't removed from the # cache while we were requesting it. # (Could have happened via propagateDirty() or eventually the arrayCacheMemoryMgr) if block_roi in self._block_locks: self._block_data[block_roi] = block self._last_access_times[block_roi] = time.time()
def execute(self, slot, subindex, roi, result): assert slot == self.ConcatenatedOutput self.progressSignal(0.0) num_dirty_slots = len(self._dirty_slots) subtask_progress = {} progress_lock = RequestLock() def forward_progress_updates(feature_slot, progress): with progress_lock: subtask_progress[feature_slot] = progress total_progress = 0.95 * sum( subtask_progress.values()) / num_dirty_slots self.progressSignal(total_progress) logger.debug( "Updating features for {} dirty images out of {}"\ "".format( len(self._dirty_slots), len(self.FeatureMatrices) ) ) pool = RequestPool() subresults = [] for feature_slot, progress_slot in zip(self.FeatureMatrices, self.ProgressSignals): subresults.append([None]) req = feature_slot[:] req.writeInto(subresults[-1]) # Only use progress for slots that were dirty. # The others are going to be really fast. if feature_slot in self._dirty_slots: sub_progress_signal = progress_slot.value sub_progress_signal.subscribe( partial(forward_progress_updates, feature_slot)) pool.add(req) pool.wait() # Reset dirty slots self._dirty_slots = set() # Since the subresults are returned in 'value' slots, # we have to unpack them from their single-element lists. subresult_list = list(itertools.chain(*subresults)) total_matrix = numpy.concatenate(subresult_list, axis=0) self.progressSignal(100.0) result[0] = total_matrix
def _getCacheFile(self, entire_block_roi): """ Get the cache file for the block that starts at block_start. If it doesn't exist yet, create it first. """ block_start = tuple(entire_block_roi[0]) if block_start in self._cacheFiles: return self._cacheFiles[block_start] with self._lock: if block_start not in self._cacheFiles: # Create an in-memory hdf5 file with a unique name logger.debug("Creating a cache file for block: {}".format( list(block_start))) filename = str(id(self)) + str(id( self._cacheFiles)) + str(block_start) mem_file = h5py.File(filename, driver='core', backing_store=False, mode='w') # h5py will crash if the chunkshape is larger than the dataset shape. datashape = tuple(entire_block_roi[1] - entire_block_roi[0]) chunkshape = numpy.minimum(numpy.array(datashape), self._chunkshape) chunkshape = tuple(chunkshape) # Make a compressed dataset mem_file.create_dataset( 'data', shape=datashape, dtype=self.Output.meta.dtype, chunks=chunkshape, compression='lzf') # lzf should be faster than gzip, # with a slightly worse compression ratio self._blockLocks[block_start] = RequestLock() self._cacheFiles[block_start] = mem_file self._dirtyBlocks.add(block_start) return self._cacheFiles[block_start]
def execute(self, slot, subindex, roi, result): with self._lock: # Does this roi happen to fit ENTIRELY within an existing stored block? outer_rois = containing_rois( self._block_data.keys(), (roi.start, roi.stop) ) if len(outer_rois) > 0: # Use the first one we found block_roi = self._standardize_roi( *outer_rois[0] ) block_relative_roi = numpy.array( (roi.start, roi.stop) ) - block_roi[0] result[:] = self._block_data[block_roi][ roiToSlice(*block_relative_roi) ] return # Standardize roi for usage as dict key block_roi = self._standardize_roi( roi.start, roi.stop ) # Get lock for this block (create first if necessary) with self._lock: if block_roi not in self._block_locks: self._block_locks[block_roi] = RequestLock() block_lock = self._block_locks[block_roi] # Handle identical simultaneous requests with block_lock: try: result[:] = self._block_data[block_roi] return except KeyError: # Not yet stored: Request it now. self.Input(roi.start, roi.stop).writeInto(result).block() block = result.copy() with self._lock: # Store the data. # First double-check that the block wasn't removed from the # cache while we were requesting it. # (Could have happened via propagateDirty() or eventually the arrayCacheMemoryMgr) if block_roi in self._block_locks: self._block_data[block_roi] = block
def __init__(self, *args, **kwargs): super(OpGridCreator, self).__init__(*args, **kwargs) self.gridArray = None self.lock = RequestLock()
class OpObjectPredict(Operator): """Predicts object labels in a single image. Performs prediction on all objects in a time slice at once, and caches the result. """ # WARNING: right now we predict and cache a whole time slice. We # expect this to be fast because there are relatively few objects # compared to the number of pixels in pixel classification. If # this should be too slow, we should instead cache at the object # level, and only predict for objects visible in the roi. name = "OpObjectPredict" Features = InputSlot(rtype=List, stype=Opaque) SelectedFeatures = InputSlot(rtype=List, stype=Opaque) Classifier = InputSlot() LabelsCount = InputSlot(stype='integer') InputProbabilities = InputSlot(stype=Opaque, rtype=List, optional=True) Predictions = OutputSlot(stype=Opaque, rtype=List) Probabilities = OutputSlot(stype=Opaque, rtype=List) CachedProbabilities = OutputSlot(stype=Opaque, rtype=List) ProbabilityChannels = OutputSlot(stype=Opaque, rtype=List, level=1) BadObjects = OutputSlot(stype=Opaque, rtype=List) #SegmentationThreshold = 0.5 def setupOutputs(self): self.Predictions.meta.shape = self.Features.meta.shape self.Predictions.meta.dtype = object self.Predictions.meta.axistags = None self.Predictions.meta.mapping_dtype = numpy.uint8 self.Probabilities.meta.shape = self.Features.meta.shape self.Probabilities.meta.dtype = object self.Probabilities.meta.mapping_dtype = numpy.float32 self.Probabilities.meta.axistags = None self.BadObjects.meta.shape = self.Features.meta.shape self.BadObjects.meta.dtype = object self.BadObjects.meta.mapping_dtype = numpy.uint8 self.BadObjects.meta.axistags = None if self.LabelsCount.ready(): nlabels = self.LabelsCount[:].wait() nlabels = int(nlabels[0]) self.ProbabilityChannels.resize(nlabels) for oslot in self.ProbabilityChannels: oslot.meta.shape = self.Features.meta.shape oslot.meta.dtype = object oslot.meta.axistags = None oslot.meta.mapping_dtype = numpy.float32 self.lock = RequestLock() self.prob_cache = dict() self.bad_objects = dict() def execute(self, slot, subindex, roi, result): assert slot in [self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return {t: self.prob_cache[t] for t in times if t in self.prob_cache} forests=self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0],)) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t].astype(numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request( partial(predict_forest, t, i) ) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array( prob_predictions[t] ) averaged_predictions = numpy.average( stacked_predictions, axis=0 ) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][0] = 0 # Background probability is always zero if slot == self.Probabilities: return { t : self.prob_cache[t] for t in times } elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = {t: self.prob_cache[t][:, subindex[0]] for t in times} except: # no probabilities available for this class; return zeros prob_single_channel = {t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times} return prob_single_channel elif slot == self.BadObjects: return { t : self.bad_objects[t] for t in times } else: assert False, "Unknown input slot" finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): self.prob_cache = {} if slot is self.InputProbabilities: self.prob_cache = self.InputProbabilities([]).wait() self.Predictions.setDirty(()) self.Probabilities.setDirty(()) self.ProbabilityChannels.setDirty(())
class OpPatchCreator(Operator): """Patchifies an image.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel PatchOverlapVertical = InputSlot() # vertical overlap between patches in pixels PatchOverlapHorizontal = InputSlot() # horizontal overlap between patches in pixels GridStartVertical = InputSlot() # X - start of patch grid in pixel GridStartHorizontal = InputSlot() # Y - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel RawInput = InputSlot() # raw input image FilteredInput = InputSlot() # filtered input image Patches = OutputSlot() # output patches Positions = OutputSlot() # output positions of patches NumPatches = OutputSlot() # number of patches in x/y-direction GridOutput = OutputSlot() def __init__(self, *args, **kwargs): super(OpPatchCreator, self).__init__(*args, **kwargs) self.patches = None self.posns = None self.lock = RequestLock() self.opGrid = opGridCreator.OpGridCreator(graph=Graph()) self.GridOutput.connect(self.opGrid.Output) self.opGrid.GridStartVertical.connect(self.GridStartVertical) self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal) self.opGrid.GridWidth.connect(self.GridWidth) self.opGrid.GridHeight.connect(self.GridHeight) self.opGrid.PatchWidth.connect(self.PatchWidth) self.opGrid.PatchHeight.connect(self.PatchHeight) def setupOutputs(self): skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if skipVertical <= 0 or skipHorizontal <= 0: return # total number of patches in x-direction numPatchesVertical = (self.GridHeight.value - self.PatchHeight.value) // skipVertical + 1 # total number of patches in y-direction numPatchesHorizontal = (self.GridWidth.value - self.PatchWidth.value) // skipHorizontal + 1 # total number of patches on the image totNumPatches = numPatchesVertical * numPatchesHorizontal # number of pixel per patch numPixelsPerPatch = self.PatchWidth.value * self.PatchHeight.value # set shape of output data self.NumPatches.meta.shape = (2,) self.NumPatches.meta.dtype = numpy.uint32 self.NumPatches.meta.axistags = None self.Positions.meta.shape = (totNumPatches, 2) self.Positions.meta.dtype = numpy.uint32 self.Positions.meta.axistags = None n_channels = self.FilteredInput.meta.shape[2] self.Patches.meta.shape = (totNumPatches, self.PatchWidth.value, self.PatchHeight.value, n_channels) self.Patches.meta.dtype = numpy.float32 self.Patches.meta.axistags = None # set input slots for operator opGridCreator self.opGrid.ImageHeight.setValue(self.RawInput.meta.shape[1]) self.opGrid.ImageWidth.setValue(self.RawInput.meta.shape[0]) def execute(self, slot, subindex, roi, result): """Create patches from filtered input image slot. shape of output patches [number of patches, number of pixels per patch, number of filter responses = channels] """ skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if slot is self.NumPatches: pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value numPatchesVertical = int((gHeight - pHeight) / skipVertical) + 1 numPatchesHorizontal = int((gWidth - pWidth) / skipHorizontal) + 1 numpatches = numpy.zeros((2,)) numpatches[:] = (numPatchesVertical, numPatchesHorizontal) return numpatches try: self.lock.acquire() if self.patches is None: img = self.FilteredInput[:].wait() pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value overlapVertical = self.PatchOverlapVertical.value overlapHorizontal = self.PatchOverlapHorizontal.value gStartVertical = self.GridStartVertical.value gStartHorizontal = self.GridStartHorizontal.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value img = self.FilteredInput[:].wait() img.axistags = self.FilteredInput.meta.axistags img = numpy.asarray(img.transposeToNumpyOrder()) self.patches, self.posns = patchify( img, (pHeight, pWidth), (overlapVertical, overlapHorizontal), (gStartVertical, gStartHorizontal), (gHeight, gWidth) ) if slot is self.Patches: return self.patches elif slot is self.Positions: return self.posns finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.patches = None self.posns = None roi = slice(None) self.Patches.setDirty(roi) self.Positions.setDirty(roi) self.NumPatches.setDirty(roi) finally: self.lock.release()
class OpObjectPredict(Operator): """Predicts object labels in a single image. Performs prediction on all objects in a time slice at once, and caches the result. """ # WARNING: right now we predict and cache a whole time slice. We # expect this to be fast because there are relatively few objects # compared to the number of pixels in pixel classification. If # this should be too slow, we should instead cache at the object # level, and only predict for objects visible in the roi. name = "OpObjectPredict" Features = InputSlot(rtype=List, stype=Opaque) SelectedFeatures = InputSlot(rtype=List, stype=Opaque) Classifier = InputSlot() LabelsCount = InputSlot(stype='integer') InputProbabilities = InputSlot(stype=Opaque, rtype=List, optional=True) Predictions = OutputSlot(stype=Opaque, rtype=List) Probabilities = OutputSlot(stype=Opaque, rtype=List) CachedProbabilities = OutputSlot(stype=Opaque, rtype=List) ProbabilityChannels = OutputSlot(stype=Opaque, rtype=List, level=1) BadObjects = OutputSlot(stype=Opaque, rtype=List) #SegmentationThreshold = 0.5 def setupOutputs(self): self.Predictions.meta.shape = self.Features.meta.shape self.Predictions.meta.dtype = object self.Predictions.meta.axistags = None self.Predictions.meta.mapping_dtype = numpy.uint8 self.Probabilities.meta.shape = self.Features.meta.shape self.Probabilities.meta.dtype = object self.Probabilities.meta.mapping_dtype = numpy.float32 self.Probabilities.meta.axistags = None self.BadObjects.meta.shape = self.Features.meta.shape self.BadObjects.meta.dtype = object self.BadObjects.meta.mapping_dtype = numpy.uint8 self.BadObjects.meta.axistags = None if self.LabelsCount.ready(): nlabels = self.LabelsCount[:].wait() nlabels = int(nlabels[0]) self.ProbabilityChannels.resize(nlabels) for oslot in self.ProbabilityChannels: oslot.meta.shape = self.Features.meta.shape oslot.meta.dtype = object oslot.meta.axistags = None oslot.meta.mapping_dtype = numpy.float32 self.lock = RequestLock() self.prob_cache = dict() self.bad_objects = dict() def execute(self, slot, subindex, roi, result): assert slot in [ self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects ] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return { t: self.prob_cache[t] for t in times if t in self.prob_cache } forests = self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0], )) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[ forest_index].predictProbabilities(feats[_t].astype( numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request(partial(predict_forest, t, i)) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array(prob_predictions[t]) averaged_predictions = numpy.average(stacked_predictions, axis=0) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][ 0] = 0 # Background probability is always zero if slot == self.Probabilities: return {t: self.prob_cache[t] for t in times} elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = { t: self.prob_cache[t][:, subindex[0]] for t in times } except: # no probabilities available for this class; return zeros prob_single_channel = { t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times } return prob_single_channel elif slot == self.BadObjects: return {t: self.bad_objects[t] for t in times} else: assert False, "Unknown input slot" finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): self.prob_cache = {} if slot is self.InputProbabilities: self.prob_cache = self.InputProbabilities([]).wait() self.Predictions.setDirty(()) self.Probabilities.setDirty(()) self.ProbabilityChannels.setDirty(())
def __init__(self, *args, **kwargs): super( OpUnblockedArrayCache, self ).__init__(*args, **kwargs) self._lock = RequestLock() self._block_data = {} self._block_locks = {}
def __init__(self, *args, **kwargs): super(self.__class__, self).__init__(*args, **kwargs) self._blockPipelines = {} # indexed by blockstart self._lock = RequestLock()
def execute(self, slot, subindex, roi, result): assert slot == self.LabelAndFeatureMatrix self.progressSignal(0.0) # Technically, this could result in strange progress reporting if execute() # is called by multiple threads in parallel. # This could be fixed with some fancier progress state, but # (1) We don't expect that to by typical, and # (2) progress reporting is merely informational. num_dirty_blocks = len(self._dirty_blocks) remaining_dirty = [num_dirty_blocks] def update_progress(result): remaining_dirty[0] -= 1 percent_complete = 95.0 * (num_dirty_blocks - remaining_dirty[0]) / num_dirty_blocks self.progressSignal(percent_complete) # Update all dirty blocks in the cache logger.debug("Updating {} dirty blocks".format(num_dirty_blocks)) # Before updating the blocks, ensure that the necessary block locks exist # It's better to do this now instead of inside each request # to avoid contention over self._lock with self._lock: for block_start in self._dirty_blocks: if block_start not in self._block_locks: self._block_locks[block_start] = RequestLock() # Update each block in its own request. pool = RequestPool() reqs = {} for block_start in self._dirty_blocks: req = Request(partial(self._get_features_for_block, block_start)) req.notify_finished(update_progress) reqs[block_start] = req pool.add(req) pool.wait() # Now store the results we got. # It's better to store the blocks here -- rather than within each request -- to # avoid contention over self._lock from within every block's request. with self._lock: for block_start, req in reqs.items(): if req.result is None: # 'None' means the block wasn't dirty. No need to update. continue labels_and_features_matrix = req.result self._dirty_blocks.remove(block_start) if labels_and_features_matrix.shape[0] > 0: # Update the block entry with the new matrix. self._blockwise_feature_matrices[ block_start] = labels_and_features_matrix else: # All labels were removed from the block, # So the new feature matrix is empty. # Just delete its entry from our list. try: del self._blockwise_feature_matrices[block_start] except KeyError: pass # Concatenate the all blockwise results if self._blockwise_feature_matrices: total_feature_matrix = numpy.concatenate( self._blockwise_feature_matrices.values(), axis=0) else: # No label points at all. # Return an empty label&feature matrix (of the correct shape) num_feature_channels = self.FeatureImage.meta.shape[-1] total_feature_matrix = numpy.ndarray(shape=(0, 1 + num_feature_channels), dtype=numpy.float32) self.progressSignal(100.0) logger.debug("After update, there are {} clean blocks".format( len(self._blockwise_feature_matrices))) result[0] = total_feature_matrix
class OpPatchCreator(Operator): """Patchifies an image.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel PatchOverlapVertical = InputSlot( ) # vertical overlap between patches in pixels PatchOverlapHorizontal = InputSlot( ) # horizontal overlap between patches in pixels GridStartVertical = InputSlot() # X - start of patch grid in pixel GridStartHorizontal = InputSlot() # Y - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel RawInput = InputSlot() # raw input image FilteredInput = InputSlot() # filtered input image Patches = OutputSlot() # output patches Positions = OutputSlot() # output positions of patches NumPatches = OutputSlot() # number of patches in x/y-direction GridOutput = OutputSlot() def __init__(self, *args, **kwargs): super(OpPatchCreator, self).__init__(*args, **kwargs) self.patches = None self.posns = None self.lock = RequestLock() self.opGrid = opGridCreator.OpGridCreator(graph=Graph()) self.GridOutput.connect(self.opGrid.Output) self.opGrid.GridStartVertical.connect(self.GridStartVertical) self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal) self.opGrid.GridWidth.connect(self.GridWidth) self.opGrid.GridHeight.connect(self.GridHeight) self.opGrid.PatchWidth.connect(self.PatchWidth) self.opGrid.PatchHeight.connect(self.PatchHeight) def setupOutputs(self): skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if skipVertical <= 0 or skipHorizontal <= 0: return # total number of patches in x-direction numPatchesVertical = (self.GridHeight.value - self.PatchHeight.value) // skipVertical + 1 # total number of patches in y-direction numPatchesHorizontal = (self.GridWidth.value - self.PatchWidth.value) // skipHorizontal + 1 # total number of patches on the image totNumPatches = numPatchesVertical * numPatchesHorizontal # number of pixel per patch numPixelsPerPatch = self.PatchWidth.value * self.PatchHeight.value # set shape of output data self.NumPatches.meta.shape = (2, ) self.NumPatches.meta.dtype = numpy.uint32 self.NumPatches.meta.axistags = None self.Positions.meta.shape = (totNumPatches, 2) self.Positions.meta.dtype = numpy.uint32 self.Positions.meta.axistags = None n_channels = self.FilteredInput.meta.shape[2] self.Patches.meta.shape = (totNumPatches, self.PatchWidth.value, self.PatchHeight.value, n_channels) self.Patches.meta.dtype = numpy.float32 self.Patches.meta.axistags = None # set input slots for operator opGridCreator self.opGrid.ImageHeight.setValue(self.RawInput.meta.shape[1]) self.opGrid.ImageWidth.setValue(self.RawInput.meta.shape[0]) def execute(self, slot, subindex, roi, result): """Create patches from filtered input image slot. shape of output patches [number of patches, number of pixels per patch, number of filter responses = channels] """ skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if slot is self.NumPatches: pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value numPatchesVertical = ((gHeight - pHeight) // skipVertical) + 1 numPatchesHorizontal = ((gWidth - pWidth) // skipHorizontal) + 1 numpatches = numpy.zeros((2, )) numpatches[:] = (numPatchesVertical, numPatchesHorizontal) return numpatches try: self.lock.acquire() if self.patches is None: img = self.FilteredInput[:].wait() pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value overlapVertical = self.PatchOverlapVertical.value overlapHorizontal = self.PatchOverlapHorizontal.value gStartVertical = self.GridStartVertical.value gStartHorizontal = self.GridStartHorizontal.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value img = self.FilteredInput[:].wait() img.axistags = self.FilteredInput.meta.axistags img = numpy.asarray(img.transposeToNumpyOrder()) self.patches, self.posns = patchify( img, (pHeight, pWidth), (overlapVertical, overlapHorizontal), (gStartVertical, gStartHorizontal), (gHeight, gWidth)) if slot is self.Patches: return self.patches elif slot is self.Positions: return self.posns finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.patches = None self.posns = None roi = slice(None) self.Patches.setDirty(roi) self.Positions.setDirty(roi) self.NumPatches.setDirty(roi) finally: self.lock.release()
def __init__(self, *args, **kwargs): super(OpCompressedCache, self).__init__(*args, **kwargs) self._lock = RequestLock() self._init_cache(None)
def __init__(self, *args, **kwargs): super(OpTiffReader, self).__init__(*args, **kwargs) self._filepath = None self._tiff_file = None self._page_shape = None self._tiff_file_lock = RequestLock()