def test_RequestLock(): assert Request.global_thread_pool.num_workers > 0, \ "This test must be used with the real threadpool." lockA = RequestLock() lockB = RequestLock() def log_request_system_status(): status = ( "*************************\n" + 'lockA.pending: {}\n'.format(len(lockA._pendingRequests)) + 'lockB.pending: {}\n'.format(len(lockB._pendingRequests)) #+ "suspended Requests: {}\n".format( len(Request.global_suspend_set) ) + "global job queue: {}\n".format( len(Request.global_thread_pool.unassigned_tasks))) for worker in Request.global_thread_pool.workers: status += "{} queued tasks: {}\n".format(worker.name, len(worker.job_queue)) status += "*****************************************************" logger.debug(status) running = [True] def periodic_status(): while running[0]: time.sleep(0.5) log_request_system_status() # Uncomment these lines to print periodic status while the test runs... status_thread = threading.Thread(target=periodic_status) status_thread.daemon = True status_thread.start() try: _impl_test_lock(lockA, lockB, Request, 1000) except: log_request_system_status() running[0] = False status_thread.join() global paused paused = False Request.reset_thread_pool(Request.global_thread_pool.num_workers) if lockA.locked(): lockA.release() if lockB.locked(): lockB.release() raise log_request_system_status() running[0] = False status_thread.join()
def test_RequestLock(): assert Request.global_thread_pool.num_workers > 0, "This test must be used with the real threadpool." lockA = RequestLock() lockB = RequestLock() def log_request_system_status(): status = ( "*************************\n" + "lockA.pending: {}\n".format(len(lockA._pendingRequests)) + "lockB.pending: {}\n".format(len(lockB._pendingRequests)) # + "suspended Requests: {}\n".format( len(Request.global_suspend_set) ) + "global job queue: {}\n".format(len(Request.global_thread_pool.unassigned_tasks)) ) for worker in Request.global_thread_pool.workers: status += "{} queued tasks: {}\n".format(worker.name, len(worker.job_queue)) status += "*****************************************************" logger.debug(status) running = [True] def periodic_status(): while running[0]: time.sleep(0.5) log_request_system_status() # Uncomment these lines to print periodic status while the test runs... status_thread = threading.Thread(target=periodic_status) status_thread.daemon = True status_thread.start() try: _impl_test_lock(lockA, lockB, Request, 1000) except: log_request_system_status() running[0] = False status_thread.join() global paused paused = False Request.reset_thread_pool(Request.global_thread_pool.num_workers) if lockA.locked(): lockA.release() if lockB.locked(): lockB.release() raise log_request_system_status() running[0] = False status_thread.join()
class OpGridCreator(Operator): """Creates list of patches from all filter responses.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel GridStartVertical = InputSlot() # Vertical - start of patch grid in pixel GridStartHorizontal = InputSlot( ) # Horizontal - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel ImageWidth = InputSlot() # width of raw image ImageHeight = InputSlot() # height of raw image Output = OutputSlot() # number of patches in x/y-direction def __init__(self, *args, **kwargs): super(OpGridCreator, self).__init__(*args, **kwargs) self.gridArray = None self.lock = RequestLock() def setupOutputs(self): self.Output.meta.shape = (self.ImageWidth.value, self.ImageHeight.value) # the viewer uses a different coordinate system self.Output.meta.axistags = vigra.defaultAxistags('yx') self.Output.meta.dtype = numpy.uint8 def execute(self, slot, subindex, roi, result): """create grid""" try: self.lock.acquire() if self.gridArray is None: shape = (self.ImageHeight.value, self.ImageWidth.value) patch = (self.PatchHeight.value, self.PatchWidth.value) offset = (self.GridStartVertical.value, self.GridStartHorizontal.value) grid = (self.GridHeight.value, self.GridWidth.value) self.gridArray = make_grid(shape, patch, grid, offset) return self.gridArray[roi.toSlice()] finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.gridArray = None self.Output.setDirty(slice(None)) finally: self.lock.release()
class OpGridCreator(Operator): """Creates list of patches from all filter responses.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel GridStartVertical = InputSlot() # Vertical - start of patch grid in pixel GridStartHorizontal = InputSlot() # Horizontal - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel ImageWidth = InputSlot() # width of raw image ImageHeight = InputSlot() # height of raw image Output = OutputSlot() # number of patches in x/y-direction def __init__(self, *args, **kwargs): super(OpGridCreator, self).__init__(*args, **kwargs) self.gridArray = None self.lock = RequestLock() def setupOutputs(self): self.Output.meta.shape = (self.ImageWidth.value, self.ImageHeight.value) # the viewer uses a different coordinate system self.Output.meta.axistags = vigra.defaultAxistags('yx') self.Output.meta.dtype = numpy.uint8 def execute(self, slot, subindex, roi, result): """create grid""" try: self.lock.acquire() if self.gridArray is None: shape = (self.ImageHeight.value, self.ImageWidth.value) patch = (self.PatchHeight.value, self.PatchWidth.value) offset = (self.GridStartVertical.value, self.GridStartHorizontal.value) grid = (self.GridHeight.value, self.GridWidth.value) self.gridArray = make_grid(shape, patch, grid, offset) return self.gridArray[roi.toSlice()] finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.gridArray = None self.Output.setDirty(slice(None)) finally: self.lock.release()
def test_cancellation_behavior(): """ If a request is cancelled while it was waiting on a lock, it should raise the CancellationException. """ lock = RequestLock() lock.acquire() def f(): try: with lock: assert False except Request.CancellationException: pass else: assert False finished = [False] cancelled = [False] failed = [False] def handle_finished(result): finished[0] = True def handle_cancelled(): cancelled[0] = True def handle_failed(*args): failed[0] = True req = Request(f) req.notify_finished(handle_finished) req.notify_failed(handle_failed) req.notify_cancelled(handle_cancelled) req.submit() req.cancel() time.sleep(0.1) lock.release() time.sleep(0.1) assert not finished[0] and not failed[0] and cancelled[0]
class OpPatchCreator(Operator): """Patchifies an image.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel PatchOverlapVertical = InputSlot( ) # vertical overlap between patches in pixels PatchOverlapHorizontal = InputSlot( ) # horizontal overlap between patches in pixels GridStartVertical = InputSlot() # X - start of patch grid in pixel GridStartHorizontal = InputSlot() # Y - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel RawInput = InputSlot() # raw input image FilteredInput = InputSlot() # filtered input image Patches = OutputSlot() # output patches Positions = OutputSlot() # output positions of patches NumPatches = OutputSlot() # number of patches in x/y-direction GridOutput = OutputSlot() def __init__(self, *args, **kwargs): super(OpPatchCreator, self).__init__(*args, **kwargs) self.patches = None self.posns = None self.lock = RequestLock() self.opGrid = opGridCreator.OpGridCreator(graph=Graph()) self.GridOutput.connect(self.opGrid.Output) self.opGrid.GridStartVertical.connect(self.GridStartVertical) self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal) self.opGrid.GridWidth.connect(self.GridWidth) self.opGrid.GridHeight.connect(self.GridHeight) self.opGrid.PatchWidth.connect(self.PatchWidth) self.opGrid.PatchHeight.connect(self.PatchHeight) def setupOutputs(self): skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if skipVertical <= 0 or skipHorizontal <= 0: return # total number of patches in x-direction numPatchesVertical = (self.GridHeight.value - self.PatchHeight.value) // skipVertical + 1 # total number of patches in y-direction numPatchesHorizontal = (self.GridWidth.value - self.PatchWidth.value) // skipHorizontal + 1 # total number of patches on the image totNumPatches = numPatchesVertical * numPatchesHorizontal # number of pixel per patch numPixelsPerPatch = self.PatchWidth.value * self.PatchHeight.value # set shape of output data self.NumPatches.meta.shape = (2, ) self.NumPatches.meta.dtype = numpy.uint32 self.NumPatches.meta.axistags = None self.Positions.meta.shape = (totNumPatches, 2) self.Positions.meta.dtype = numpy.uint32 self.Positions.meta.axistags = None n_channels = self.FilteredInput.meta.shape[2] self.Patches.meta.shape = (totNumPatches, self.PatchWidth.value, self.PatchHeight.value, n_channels) self.Patches.meta.dtype = numpy.float32 self.Patches.meta.axistags = None # set input slots for operator opGridCreator self.opGrid.ImageHeight.setValue(self.RawInput.meta.shape[1]) self.opGrid.ImageWidth.setValue(self.RawInput.meta.shape[0]) def execute(self, slot, subindex, roi, result): """Create patches from filtered input image slot. shape of output patches [number of patches, number of pixels per patch, number of filter responses = channels] """ skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if slot is self.NumPatches: pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value numPatchesVertical = ((gHeight - pHeight) // skipVertical) + 1 numPatchesHorizontal = ((gWidth - pWidth) // skipHorizontal) + 1 numpatches = numpy.zeros((2, )) numpatches[:] = (numPatchesVertical, numPatchesHorizontal) return numpatches try: self.lock.acquire() if self.patches is None: img = self.FilteredInput[:].wait() pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value overlapVertical = self.PatchOverlapVertical.value overlapHorizontal = self.PatchOverlapHorizontal.value gStartVertical = self.GridStartVertical.value gStartHorizontal = self.GridStartHorizontal.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value img = self.FilteredInput[:].wait() img.axistags = self.FilteredInput.meta.axistags img = numpy.asarray(img.transposeToNumpyOrder()) self.patches, self.posns = patchify( img, (pHeight, pWidth), (overlapVertical, overlapHorizontal), (gStartVertical, gStartHorizontal), (gHeight, gWidth)) if slot is self.Patches: return self.patches elif slot is self.Positions: return self.posns finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.patches = None self.posns = None roi = slice(None) self.Patches.setDirty(roi) self.Positions.setDirty(roi) self.NumPatches.setDirty(roi) finally: self.lock.release()
class OpObjectPredict(Operator): """Predicts object labels in a single image. Performs prediction on all objects in a time slice at once, and caches the result. """ # WARNING: right now we predict and cache a whole time slice. We # expect this to be fast because there are relatively few objects # compared to the number of pixels in pixel classification. If # this should be too slow, we should instead cache at the object # level, and only predict for objects visible in the roi. name = "OpObjectPredict" Features = InputSlot(rtype=List, stype=Opaque) SelectedFeatures = InputSlot(rtype=List, stype=Opaque) Classifier = InputSlot() LabelsCount = InputSlot(stype='integer') InputProbabilities = InputSlot(stype=Opaque, rtype=List, optional=True) Predictions = OutputSlot(stype=Opaque, rtype=List) Probabilities = OutputSlot(stype=Opaque, rtype=List) CachedProbabilities = OutputSlot(stype=Opaque, rtype=List) ProbabilityChannels = OutputSlot(stype=Opaque, rtype=List, level=1) BadObjects = OutputSlot(stype=Opaque, rtype=List) #SegmentationThreshold = 0.5 def setupOutputs(self): self.Predictions.meta.shape = self.Features.meta.shape self.Predictions.meta.dtype = object self.Predictions.meta.axistags = None self.Predictions.meta.mapping_dtype = numpy.uint8 self.Probabilities.meta.shape = self.Features.meta.shape self.Probabilities.meta.dtype = object self.Probabilities.meta.mapping_dtype = numpy.float32 self.Probabilities.meta.axistags = None self.BadObjects.meta.shape = self.Features.meta.shape self.BadObjects.meta.dtype = object self.BadObjects.meta.mapping_dtype = numpy.uint8 self.BadObjects.meta.axistags = None if self.LabelsCount.ready(): nlabels = self.LabelsCount[:].wait() nlabels = int(nlabels[0]) self.ProbabilityChannels.resize(nlabels) for oslot in self.ProbabilityChannels: oslot.meta.shape = self.Features.meta.shape oslot.meta.dtype = object oslot.meta.axistags = None oslot.meta.mapping_dtype = numpy.float32 self.lock = RequestLock() self.prob_cache = dict() self.bad_objects = dict() def execute(self, slot, subindex, roi, result): assert slot in [self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return {t: self.prob_cache[t] for t in times if t in self.prob_cache} forests=self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0],)) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t].astype(numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request( partial(predict_forest, t, i) ) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array( prob_predictions[t] ) averaged_predictions = numpy.average( stacked_predictions, axis=0 ) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][0] = 0 # Background probability is always zero if slot == self.Probabilities: return { t : self.prob_cache[t] for t in times } elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = {t: self.prob_cache[t][:, subindex[0]] for t in times} except: # no probabilities available for this class; return zeros prob_single_channel = {t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times} return prob_single_channel elif slot == self.BadObjects: return { t : self.bad_objects[t] for t in times } else: assert False, "Unknown input slot" finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): self.prob_cache = {} if slot is self.InputProbabilities: self.prob_cache = self.InputProbabilities([]).wait() self.Predictions.setDirty(()) self.Probabilities.setDirty(()) self.ProbabilityChannels.setDirty(())
class OpPatchCreator(Operator): """Patchifies an image.""" PatchWidth = InputSlot() # width of patch in pixel PatchHeight = InputSlot() # height of patch in pixel PatchOverlapVertical = InputSlot() # vertical overlap between patches in pixels PatchOverlapHorizontal = InputSlot() # horizontal overlap between patches in pixels GridStartVertical = InputSlot() # X - start of patch grid in pixel GridStartHorizontal = InputSlot() # Y - start of patch grid in pixel GridWidth = InputSlot() # width of patch grid in pixel GridHeight = InputSlot() # height of patch grid in pixel RawInput = InputSlot() # raw input image FilteredInput = InputSlot() # filtered input image Patches = OutputSlot() # output patches Positions = OutputSlot() # output positions of patches NumPatches = OutputSlot() # number of patches in x/y-direction GridOutput = OutputSlot() def __init__(self, *args, **kwargs): super(OpPatchCreator, self).__init__(*args, **kwargs) self.patches = None self.posns = None self.lock = RequestLock() self.opGrid = opGridCreator.OpGridCreator(graph=Graph()) self.GridOutput.connect(self.opGrid.Output) self.opGrid.GridStartVertical.connect(self.GridStartVertical) self.opGrid.GridStartHorizontal.connect(self.GridStartHorizontal) self.opGrid.GridWidth.connect(self.GridWidth) self.opGrid.GridHeight.connect(self.GridHeight) self.opGrid.PatchWidth.connect(self.PatchWidth) self.opGrid.PatchHeight.connect(self.PatchHeight) def setupOutputs(self): skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if skipVertical <= 0 or skipHorizontal <= 0: return # total number of patches in x-direction numPatchesVertical = (self.GridHeight.value - self.PatchHeight.value) // skipVertical + 1 # total number of patches in y-direction numPatchesHorizontal = (self.GridWidth.value - self.PatchWidth.value) // skipHorizontal + 1 # total number of patches on the image totNumPatches = numPatchesVertical * numPatchesHorizontal # number of pixel per patch numPixelsPerPatch = self.PatchWidth.value * self.PatchHeight.value # set shape of output data self.NumPatches.meta.shape = (2,) self.NumPatches.meta.dtype = numpy.uint32 self.NumPatches.meta.axistags = None self.Positions.meta.shape = (totNumPatches, 2) self.Positions.meta.dtype = numpy.uint32 self.Positions.meta.axistags = None n_channels = self.FilteredInput.meta.shape[2] self.Patches.meta.shape = (totNumPatches, self.PatchWidth.value, self.PatchHeight.value, n_channels) self.Patches.meta.dtype = numpy.float32 self.Patches.meta.axistags = None # set input slots for operator opGridCreator self.opGrid.ImageHeight.setValue(self.RawInput.meta.shape[1]) self.opGrid.ImageWidth.setValue(self.RawInput.meta.shape[0]) def execute(self, slot, subindex, roi, result): """Create patches from filtered input image slot. shape of output patches [number of patches, number of pixels per patch, number of filter responses = channels] """ skipVertical = self.PatchHeight.value - self.PatchOverlapVertical.value skipHorizontal = self.PatchWidth.value - self.PatchOverlapHorizontal.value if slot is self.NumPatches: pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value numPatchesVertical = int((gHeight - pHeight) / skipVertical) + 1 numPatchesHorizontal = int((gWidth - pWidth) / skipHorizontal) + 1 numpatches = numpy.zeros((2,)) numpatches[:] = (numPatchesVertical, numPatchesHorizontal) return numpatches try: self.lock.acquire() if self.patches is None: img = self.FilteredInput[:].wait() pWidth = self.PatchWidth.value pHeight = self.PatchHeight.value overlapVertical = self.PatchOverlapVertical.value overlapHorizontal = self.PatchOverlapHorizontal.value gStartVertical = self.GridStartVertical.value gStartHorizontal = self.GridStartHorizontal.value gWidth = self.GridWidth.value gHeight = self.GridHeight.value img = self.FilteredInput[:].wait() img.axistags = self.FilteredInput.meta.axistags img = numpy.asarray(img.transposeToNumpyOrder()) self.patches, self.posns = patchify( img, (pHeight, pWidth), (overlapVertical, overlapHorizontal), (gStartVertical, gStartHorizontal), (gHeight, gWidth) ) if slot is self.Patches: return self.patches elif slot is self.Positions: return self.posns finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): try: self.lock.acquire() self.patches = None self.posns = None roi = slice(None) self.Patches.setDirty(roi) self.Positions.setDirty(roi) self.NumPatches.setDirty(roi) finally: self.lock.release()
class OpObjectPredict(Operator): """Predicts object labels in a single image. Performs prediction on all objects in a time slice at once, and caches the result. """ # WARNING: right now we predict and cache a whole time slice. We # expect this to be fast because there are relatively few objects # compared to the number of pixels in pixel classification. If # this should be too slow, we should instead cache at the object # level, and only predict for objects visible in the roi. name = "OpObjectPredict" Features = InputSlot(rtype=List, stype=Opaque) SelectedFeatures = InputSlot(rtype=List, stype=Opaque) Classifier = InputSlot() LabelsCount = InputSlot(stype='integer') InputProbabilities = InputSlot(stype=Opaque, rtype=List, optional=True) Predictions = OutputSlot(stype=Opaque, rtype=List) Probabilities = OutputSlot(stype=Opaque, rtype=List) CachedProbabilities = OutputSlot(stype=Opaque, rtype=List) ProbabilityChannels = OutputSlot(stype=Opaque, rtype=List, level=1) BadObjects = OutputSlot(stype=Opaque, rtype=List) #SegmentationThreshold = 0.5 def setupOutputs(self): self.Predictions.meta.shape = self.Features.meta.shape self.Predictions.meta.dtype = object self.Predictions.meta.axistags = None self.Predictions.meta.mapping_dtype = numpy.uint8 self.Probabilities.meta.shape = self.Features.meta.shape self.Probabilities.meta.dtype = object self.Probabilities.meta.mapping_dtype = numpy.float32 self.Probabilities.meta.axistags = None self.BadObjects.meta.shape = self.Features.meta.shape self.BadObjects.meta.dtype = object self.BadObjects.meta.mapping_dtype = numpy.uint8 self.BadObjects.meta.axistags = None if self.LabelsCount.ready(): nlabels = self.LabelsCount[:].wait() nlabels = int(nlabels[0]) self.ProbabilityChannels.resize(nlabels) for oslot in self.ProbabilityChannels: oslot.meta.shape = self.Features.meta.shape oslot.meta.dtype = object oslot.meta.axistags = None oslot.meta.mapping_dtype = numpy.float32 self.lock = RequestLock() self.prob_cache = dict() self.bad_objects = dict() def execute(self, slot, subindex, roi, result): assert slot in [ self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects ] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return { t: self.prob_cache[t] for t in times if t in self.prob_cache } forests = self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0], )) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[ forest_index].predictProbabilities(feats[_t].astype( numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request(partial(predict_forest, t, i)) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array(prob_predictions[t]) averaged_predictions = numpy.average(stacked_predictions, axis=0) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][ 0] = 0 # Background probability is always zero if slot == self.Probabilities: return {t: self.prob_cache[t] for t in times} elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = { t: self.prob_cache[t][:, subindex[0]] for t in times } except: # no probabilities available for this class; return zeros prob_single_channel = { t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times } return prob_single_channel elif slot == self.BadObjects: return {t: self.bad_objects[t] for t in times} else: assert False, "Unknown input slot" finally: self.lock.release() def propagateDirty(self, slot, subindex, roi): self.prob_cache = {} if slot is self.InputProbabilities: self.prob_cache = self.InputProbabilities([]).wait() self.Predictions.setDirty(()) self.Probabilities.setDirty(()) self.ProbabilityChannels.setDirty(())