def execute(self, slot, subindex, roi, result): t1 = time.perf_counter() key = roi.toSlice() nlabels = self.inputs["LabelsCount"].value traceLogger.debug( "OpPredictRandomForest: Requesting classifier. roi={}".format(roi)) forests = self.inputs["Classifier"][:].wait() if any(forest is None for forest in forests): # Training operator may return 'None' if there was no data to train with return np.zeros(np.subtract(roi.stop, roi.start), dtype=np.float32)[...] traceLogger.debug("OpPredictRandomForest: Got classifier") #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels) newKey = key[:-1] newKey += (slice(0, self.inputs["Image"].meta.shape[-1], None), ) res = self.inputs["Image"][newKey].wait() shape = res.shape prod = np.prod(shape[:-1]) res.shape = (prod, shape[-1]) features = res predictions = [0] * len(forests) t2 = time.perf_counter() pool = RequestPool() def predict_forest(i): predictions[i] = forests[i].predict( np.asarray(features, dtype=np.float32)) predictions[i] = predictions[i].reshape(result.shape[:-1]) for i, f in enumerate(forests): req = pool.request(partial(predict_forest, i)) pool.wait() pool.clean() #predictions[0] = forests[0].predict(np.asarray(features, dtype = np.float32), normalize = False) #predictions[0] = predictions[0].reshape(result.shape) prediction = np.dstack(predictions) result[...] = prediction # If our LabelsCount is higher than the number of labels in the training set, # then our results aren't really valid. FIXME !!! # Duplicate the last label's predictions #for c in range(result.shape[-1]): # result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)] t3 = time.perf_counter() logger.debug( "Predict took %fseconds, actual RF time was %fs, feature time was %fs" % (t3 - t1, t3 - t2, t2 - t1)) return result
def execute(self, slot, subindex, roi, result): assert slot == self._ReorderedOutput pool = RequestPool() t_ind = 0 for t in range(roi.start[0], roi.stop[0]): c_ind = 0 for c in range(roi.start[-1], roi.stop[-1]): newroi = roi.copy() newroi.start[0] = t newroi.stop[0] = t+1 newroi.start[-1] = c newroi.stop[-1] = c+1 req = self._op.Output.get(newroi) resView = result[t_ind:t_ind+1, ..., c_ind:c_ind+1] req.writeInto(resView) pool.add(req) c_ind += 1 t_ind += 1 pool.wait() pool.clean()
def _label(self, roi, result): result = vigra.taggedView(result, axistags=self.Output.meta.axistags) # get the background values bg = self.Background[...].wait() bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags) bg = bg.withAxes(*'ct') assert np.all(self.Background.meta.shape[3:] == self.Input.meta.shape[3:]),\ "Shape of background values incompatible to shape of Input" # do labeling in parallel over channels and time slices pool = RequestPool() start = np.asarray(roi.start, dtype=np.int) stop = np.asarray(roi.stop, dtype=np.int) for ti, t in enumerate(range(roi.start[4], roi.stop[4])): start[4], stop[4] = t, t+1 for ci, c in enumerate(range(roi.start[3], roi.stop[3])): start[3], stop[3] = c, c+1 newRoi = SubRegion(self.Output, start=tuple(start), stop=tuple(stop)) resView = result[..., ci, ti].withAxes(*'xyz') req = Request(partial(self._label3d, newRoi, bg[c, t], resView)) pool.add(req) logger.debug( "{}: Computing connected components for ROI {} ...".format( self.name, roi)) pool.wait() pool.clean() logger.debug("{}: Connected components computed.".format( self.name))
def execute(self, slot, subindex, roi, result): with self._lock: if self.cache is None: fullBlockShape = numpy.array([self.blockShape.value for i in self.Input.meta.shape]) fun = self.inputs["Function"].value #data = self.inputs["Input"][:].wait() #split up requests into blocks shape = self.Input.meta.shape numBlocks = numpy.ceil(shape/(1.0*fullBlockShape)).astype("int") blockCache = numpy.ndarray(shape = numpy.prod(numBlocks), dtype=self.Output.meta.dtype) pool = RequestPool() #blocks holds the different roi keys for each of the blocks blocks = itertools.product(*[range(i) for i in numBlocks]) blockKeys = [] for b in blocks: start = b * fullBlockShape stop = b * fullBlockShape + fullBlockShape stop = numpy.min(numpy.vstack((stop, shape)), axis=0) blockKey = roiToSlice(start, stop) blockKeys.append(blockKey) def predict_block(i): data = self.Input[blockKeys[i]].wait() blockCache[i] = fun(data) for i,f in enumerate(blockCache): req = pool.request(partial(predict_block,i)) pool.wait() pool.clean() self.cache = [fun(blockCache)] return self.cache
def _label(self, roi, result): result = vigra.taggedView(result, axistags=self.Output.meta.axistags) # get the background values bg = self.Background[...].wait() bg = vigra.taggedView(bg, axistags=self.Background.meta.axistags) bg = bg.withAxes(*"ct") assert np.all( self.Background.meta.shape[0] == self.Input.meta.shape[0] ), "Shape of background values incompatible to shape of Input" assert np.all( self.Background.meta.shape[4] == self.Input.meta.shape[4] ), "Shape of background values incompatible to shape of Input" # do labeling in parallel over channels and time slices pool = RequestPool() start = np.asarray(roi.start, dtype=np.int) stop = np.asarray(roi.stop, dtype=np.int) for ti, t in enumerate(range(roi.start[0], roi.stop[0])): start[0], stop[0] = t, t + 1 for ci, c in enumerate(range(roi.start[4], roi.stop[4])): start[4], stop[4] = c, c + 1 newRoi = SubRegion(self.Output, start=tuple(start), stop=tuple(stop)) resView = result[ti, ..., ci].withAxes(*"xyz") req = Request(partial(self._label3d, newRoi, bg[c, t], resView)) pool.add(req) logger.debug("{}: Computing connected components for ROI {} ...".format(self.name, roi)) pool.wait() pool.clean() logger.debug("{}: Connected components computed.".format(self.name))
def execute(self, slot, subindex, rroi, result): key = roiToSlice(rroi.start,rroi.stop) cnt = 0 written = 0 start, stop = roi.sliceToRoi(key, self.outputs["Output"].meta.shape) assert (stop<=self.outputs["Output"].meta.shape).all() #axisindex = self.inputs["AxisIndex"].value flag = self.inputs["AxisFlag"].value axisindex = self.outputs["Output"].meta.axistags.index(flag) #ugly-ugly-ugly oldkey = list(key) oldkey.pop(axisindex) #print "STACKER: ", flag, axisindex #print "requesting an outslot from stacker:", key, result.shape #print "input slots total: ", len(self.inputs['Images']) requests = [] pool = RequestPool() for i, inSlot in enumerate(self.inputs['Images']): req = None inTagKeys = [ax.key for ax in inSlot.meta.axistags] if flag in inTagKeys: slices = inSlot.meta.shape[axisindex] if cnt + slices >= start[axisindex] and start[axisindex]-cnt<slices and start[axisindex]+written<stop[axisindex]: begin = 0 if cnt < start[axisindex]: begin = start[axisindex] - cnt end = slices if cnt + end > stop[axisindex]: end -= cnt + end - stop[axisindex] key_ = copy.copy(oldkey) key_.insert(axisindex, slice(begin, end, None)) reskey = [slice(None, None, None) for x in range(len(result.shape))] reskey[axisindex] = slice(written, written+end-begin, None) req = inSlot[tuple(key_)].writeInto(result[tuple(reskey)]) written += end - begin cnt += slices else: if cnt>=start[axisindex] and start[axisindex] + written < stop[axisindex]: #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey #print "result: ", result.shape, "inslot:", inSlot.meta.shape reskey = [slice(None, None, None) for s in oldkey] reskey.insert(axisindex, written) destArea = result[tuple(reskey)] req = inSlot[tuple(oldkey)].writeInto(destArea) written += 1 cnt += 1 if req is not None: pool.add(req) pool.wait() pool.clean()
def execute(self, slot, subindex, roi, result): t1 = time.time() key = roi.toSlice() nlabels=self.inputs["LabelsCount"].value traceLogger.debug("OpPredictRandomForest: Requesting classifier. roi={}".format(roi)) forests=self.inputs["Classifier"][:].wait() if forests is None or any(x is None for x in forests): # Training operator may return 'None' if there was no data to train with return numpy.zeros(numpy.subtract(roi.stop, roi.start), dtype=numpy.float32)[...] traceLogger.debug("OpPredictRandomForest: Got classifier") #assert RF.labelCount() == nlabels, "ERROR: OpPredictRandomForest, labelCount differs from true labelCount! %r vs. %r" % (RF.labelCount(), nlabels) newKey = key[:-1] newKey += (slice(0,self.inputs["Image"].meta.shape[-1],None),) res = self.inputs["Image"][newKey].wait() shape=res.shape prod = numpy.prod(shape[:-1]) res.shape = (prod, shape[-1]) features=res predictions = [0]*len(forests) def predict_forest(number): predictions[number] = forests[number].predictProbabilities(numpy.asarray(features, dtype=numpy.float32)) t2 = time.time() # predict the data with all the forests in parallel pool = RequestPool() for i,f in enumerate(forests): req = pool.request(partial(predict_forest, i)) pool.wait() pool.clean() prediction=numpy.dstack(predictions) prediction = numpy.average(prediction, axis=2) prediction.shape = shape[:-1] + (forests[0].labelCount(),) #prediction = prediction.reshape(*(shape[:-1] + (forests[0].labelCount(),))) # If our LabelsCount is higher than the number of labels in the training set, # then our results aren't really valid. FIXME !!! # Duplicate the last label's predictions for c in range(result.shape[-1]): result[...,c] = prediction[...,min(c+key[-1].start, prediction.shape[-1]-1)] t3 = time.time() self.logger.debug("predict roi=%r took %fseconds, actual RF time was %fs, feature time was %fs" % (key, t3-t1, t3-t2, t2-t1)) return result
def execute(self, slot, subindex, roi, result): featList = [] labelsList = [] for i in range(len(self.Labels)): feats = self.Features[i]([]).wait() # TODO: we should be able to use self.Labels[i].value, # but the current implementation of Slot.value() does not # do the right thing. labels = self.Labels[i]([]).wait() featstmp, labelstmp = make_feature_array(feats, labels) featList.append(featstmp) labelsList.append(labelstmp) featMatrix = _concatenate(featList, axis=0) labelsMatrix = _concatenate(labelsList, axis=0) print "training on matrix:", featMatrix.shape, featMatrix.dtype if len(featMatrix) == 0 or len(labelsMatrix) == 0: result[:] = None return oob = [0] * self.ForestCount.value try: # Ensure there are no NaNs in the feature matrix # TODO: There should probably be a better way to fix this... featMatrix = numpy.asarray(featMatrix, dtype=numpy.float32) nanFeatMatrix = numpy.isnan(featMatrix) if nanFeatMatrix.any(): warnings.warn("Feature matrix has NaN values! Replacing with 0.0...") featMatrix[numpy.where(nanFeatMatrix)] = 0.0 # train and store forests in parallel pool = RequestPool() for i in range(self.ForestCount.value): def train_and_store(number): result[number] = vigra.learning.RandomForest(self._tree_count) oob[number] = result[number].learnRF(featMatrix, numpy.asarray(labelsMatrix, dtype=numpy.uint32)) print "intermediate oob:", oob[number] req = Request( partial(train_and_store, i) ) pool.add( req ) pool.wait() pool.clean() except: print ("couldn't learn classifier") raise oob_total = numpy.mean(oob) print "training finished, out of bag error:", oob_total return result
def predict(cls, X, method="classic"): """ predict if the histograms in X correspond to missing regions do this for subsets of X in parallel """ if cls._manager is None: cls._manager = SVMManager() assert len( X.shape ) == 2, "Prediction data must have shape (nSamples, nHistogramBins)." nBins = X.shape[1] if method == "classic": svm = PseudoSVC() else: try: svm = cls._manager.get(nBins) except SVMManager.NotTrainedError: # fail gracefully if not trained => responsibility of user! svm = PseudoSVC() y = np.zeros((len(X), )) * np.nan pool = RequestPool() chunkSize = 1000 # FIXME magic number?? nChunks = len(X) // chunkSize + (1 if len(X) % chunkSize > 0 else 0) s = [ slice(k * chunkSize, min((k + 1) * chunkSize, len(X))) for k in range(nChunks) ] def partFun(i): y[s[i]] = svm.predict(X[s[i]]) for i in range(nChunks): req = Request(partial(partFun, i)) pool.add(req) pool.wait() pool.clean() # not neccessary # assert not np.any(np.isnan(y)) return np.asarray(y)
def predict(cls, X, method='classic'): """ predict if the histograms in X correspond to missing regions do this for subsets of X in parallel """ if cls._manager is None: cls._manager = SVMManager() # svm input has to be (nSamples, nFeatures) -> for us: (nSampels = len(X), nFeatures = # of histogrambins ) X_reshaped = np.zeros((len(X), len(X[0]))) for i in range(len(X)): X_reshaped[i, :] = X[i] n_bins = len(X[0]) if method == 'classic' or not have_sklearn: logger.warning("no real svm used! -> PseudoSVC") svm = PseudoSVC() else: # load samples for histograms of labeled regions try: svm = cls._manager.get(n_bins) except SVMManager.NotTrainedError: # fail gracefully if not trained => responsibility of user! svm = PseudoSVC() y = np.zeros((len(X),)) * np.nan pool = RequestPool() # chunk up all samples from X into chunks that will be predicted in parallel chunk_size = 1000 # FIXME magic number?? n_chunks = len(X)/chunk_size + (1 if len(X) % chunk_size > 0 else 0) s = [slice(k * chunk_size, min((k + 1) * chunk_size, len(X))) for k in range(n_chunks)] def partFun(i): y[s[i]] = svm.predict(X_reshaped[s[i], :]) for i in range(n_chunks): req = Request(partial(partFun, i)) pool.add(req) pool.wait() pool.clean() return np.asarray(y)
def execute(self, slot, subindex, roi, result): with self._lock: if self.cache is None: shape = self.Input.meta.shape # self.blockshape has None in the last dimension to indicate that it should not be # handled block-wise. None is replaced with the image shape in the respective axis. fullBlockShape = [] for u, v in zip(self.blockShape.value, shape): if u is not None: fullBlockShape.append(u) else: fullBlockShape.append(v) fullBlockShape = numpy.array(fullBlockShape, dtype=numpy.float64) # data = self.inputs["Input"][:].wait() # split up requests into blocks numBlocks = numpy.ceil(shape / fullBlockShape).astype("int") blockCache = numpy.ndarray(shape=numpy.prod(numBlocks), dtype=self.Output.meta.dtype) pool = RequestPool() # blocks holds the different roi keys for each of the blocks blocks = itertools.product( *[list(range(i)) for i in numBlocks]) blockKeys = [] for b in blocks: start = b * fullBlockShape stop = b * fullBlockShape + fullBlockShape stop = numpy.min(numpy.vstack((stop, shape)), axis=0) blockKey = roiToSlice(start, stop) blockKeys.append(blockKey) fun = self.inputs["Function"].value def predict_block(i): data = self.Input[blockKeys[i]].wait() blockCache[i] = fun(data) for i, f in enumerate(blockCache): req = pool.request(partial(predict_block, i)) pool.wait() pool.clean() self.cache = [fun(blockCache)] return self.cache
def predict(cls, X, method='classic'): ''' predict if the histograms in X correspond to missing regions do this for subsets of X in parallel ''' if cls._manager is None: cls._manager = SVMManager() assert len(X.shape) == 2, \ "Prediction data must have shape (nSamples, nHistogramBins)." nBins = X.shape[1] if method == 'classic' or not havesklearn: svm = PseudoSVC() else: try: svm = cls._manager.get(nBins) except SVMManager.NotTrainedError: # fail gracefully if not trained => responsibility of user! svm = PseudoSVC() y = np.zeros((len(X),))*np.nan pool = RequestPool() chunkSize = 1000 # FIXME magic number?? nChunks = len(X)//chunkSize + (1 if len(X) % chunkSize > 0 else 0) s = [slice(k*chunkSize, min((k+1)*chunkSize, len(X))) for k in range(nChunks)] def partFun(i): y[s[i]] = svm.predict(X[s[i]]) for i in range(nChunks): req = Request(partial(partFun, i)) pool.add(req) pool.wait() pool.clean() # not neccessary #assert not np.any(np.isnan(y)) return np.asarray(y)
def execute(self, slot, subindex, roi, result): with self._lock: if self.cache is None: shape = self.Input.meta.shape # self.blockshape has None in the last dimension to indicate that it should not be # handled block-wise. None is replaced with the image shape in the respective axis. fullBlockShape = [] for u, v in zip(self.blockShape.value, shape): if u is not None: fullBlockShape.append(u) else: fullBlockShape.append(v) fullBlockShape = numpy.array(fullBlockShape, dtype=numpy.float64) #data = self.inputs["Input"][:].wait() #split up requests into blocks numBlocks = numpy.ceil(shape / fullBlockShape).astype("int") blockCache = numpy.ndarray(shape = numpy.prod(numBlocks), dtype=self.Output.meta.dtype) pool = RequestPool() #blocks holds the different roi keys for each of the blocks blocks = itertools.product(*[list(range(i)) for i in numBlocks]) blockKeys = [] for b in blocks: start = b * fullBlockShape stop = b * fullBlockShape + fullBlockShape stop = numpy.min(numpy.vstack((stop, shape)), axis=0) blockKey = roiToSlice(start, stop) blockKeys.append(blockKey) fun = self.inputs["Function"].value def predict_block(i): data = self.Input[blockKeys[i]].wait() blockCache[i] = fun(data) for i,f in enumerate(blockCache): req = pool.request(partial(predict_block,i)) pool.wait() pool.clean() self.cache = [fun(blockCache)] return self.cache
def execute(self, slot, subindex, roi, result): with self._lock: if self.cache is None: fullBlockShape = numpy.array( [self.blockShape.value for i in self.Input.meta.shape]) fun = self.inputs["Function"].value #data = self.inputs["Input"][:].wait() #split up requests into blocks shape = self.Input.meta.shape numBlocks = numpy.ceil(shape / (1.0 * fullBlockShape)).astype("int") blockCache = numpy.ndarray(shape=numpy.prod(numBlocks), dtype=self.Output.meta.dtype) pool = RequestPool() #blocks holds the different roi keys for each of the blocks blocks = itertools.product(*[range(i) for i in numBlocks]) blockKeys = [] for b in blocks: start = b * fullBlockShape stop = b * fullBlockShape + fullBlockShape stop = numpy.min(numpy.vstack((stop, shape)), axis=0) blockKey = roiToSlice(start, stop) blockKeys.append(blockKey) def predict_block(i): data = self.Input[blockKeys[i]].wait() blockCache[i] = fun(data) for i, f in enumerate(blockCache): req = pool.request(partial(predict_block, i)) pool.wait() pool.clean() self.cache = [fun(blockCache)] return self.cache
def execute(self, slot, subindex, rroi, result): assert slot == self.Features or slot == self.Output if slot == self.Features: key = roiToSlice(rroi.start, rroi.stop) index = subindex[0] key = list(key) channelIndex = self.Input.meta.axistags.index('c') # Translate channel slice to the correct location for the output slot. key[channelIndex] = slice(self.featureOutputChannels[index][0] + key[channelIndex].start, self.featureOutputChannels[index][0] + key[channelIndex].stop) rroi = SubRegion(self.Output, pslice=key) # Get output slot region for this channel return self.execute(self.Output, (), rroi, result) elif slot == self.outputs["Output"]: key = rroi.toSlice() logger.debug("OpPixelFeaturesPresmoothed: request %s" % (rroi.pprint(),)) cnt = 0 written = 0 assert (rroi.stop<=self.outputs["Output"].meta.shape).all() flag = 'c' channelAxis=self.inputs["Input"].meta.axistags.index('c') axisindex = channelAxis oldkey = list(key) oldkey.pop(axisindex) inShape = self.inputs["Input"].meta.shape hasChannelAxis = (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) > 0) #if (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0): # noChannels = True inAxistags = self.inputs["Input"].meta.axistags shape = self.outputs["Output"].meta.shape axistags = self.outputs["Output"].meta.axistags result = result.view(vigra.VigraArray) result.axistags = copy.copy(axistags) hasTimeAxis = self.inputs["Input"].meta.axistags.axisTypeCount(vigra.AxisType.Time) timeAxis=self.inputs["Input"].meta.axistags.index('t') subkey = popFlagsFromTheKey(key,axistags,'c') subshape=popFlagsFromTheKey(shape,axistags,'c') at2 = copy.copy(axistags) at2.dropChannelAxis() subshape=popFlagsFromTheKey(subshape,at2,'t') subkey = popFlagsFromTheKey(subkey,at2,'t') oldstart, oldstop = roi.sliceToRoi(key, shape) start, stop = roi.sliceToRoi(subkey,subkey) maxSigma = max(0.7,self.maxSigma) #we use 0.7 as an approximation of not doing any smoothing #smoothing was already applied previously # The region of the smoothed image we need to give to the feature filter (in terms of INPUT coordinates) # 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood vigOpSourceStart, vigOpSourceStop = roi.enlargeRoiForHalo(start, stop, subshape, 0.7, self.WINDOW_SIZE) # The region of the input that we need to give to the smoothing operator (in terms of INPUT coordinates) newStart, newStop = roi.enlargeRoiForHalo(vigOpSourceStart, vigOpSourceStop, subshape, maxSigma, self.WINDOW_SIZE) newStartSmoother = roi.TinyVector(start - vigOpSourceStart) newStopSmoother = roi.TinyVector(stop - vigOpSourceStart) roiSmoother = roi.roiToSlice(newStartSmoother, newStopSmoother) # Translate coordinates (now in terms of smoothed image coordinates) vigOpSourceStart = roi.TinyVector(vigOpSourceStart - newStart) vigOpSourceStop = roi.TinyVector(vigOpSourceStop - newStart) readKey = roi.roiToSlice(newStart, newStop) writeNewStart = start - newStart writeNewStop = writeNewStart + stop - start treadKey=list(readKey) if hasTimeAxis: if timeAxis < channelAxis: treadKey.insert(timeAxis, key[timeAxis]) else: treadKey.insert(timeAxis-1, key[timeAxis]) if self.inputs["Input"].meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0: treadKey = popFlagsFromTheKey(treadKey,axistags,'c') else: treadKey.insert(channelAxis, slice(None,None,None)) treadKey=tuple(treadKey) req = self.inputs["Input"][treadKey] sourceArray = req.wait() req.clean() #req.result = None req.destination = None if sourceArray.dtype != numpy.float32: sourceArrayF = sourceArray.astype(numpy.float32) try: sourceArray.resize((1,), refcheck = False) except: pass del sourceArray sourceArray = sourceArrayF #if (self.Input.meta.axistags.axisTypeCount(vigra.AxisType.Channels) == 0): #add a channel dimension to make the code afterwards more uniform # sourceArray = sourceArray.view(numpy.ndarray) # sourceArray = sourceArray.reshape(sourceArray.shape+(1,)) sourceArrayV = sourceArray.view(vigra.VigraArray) sourceArrayV.axistags = copy.copy(inAxistags) dimCol = len(self.scales) dimRow = self.matrix.shape[0] sourceArraysForSigmas = [None]*dimCol #connect individual operators try: for j in range(dimCol): hasScale = False for i in range(dimRow): if self.matrix[i,j]: hasScale = True if not hasScale: continue destSigma = 1.0 if self.scales[j] > destSigma: tempSigma = math.sqrt(self.scales[j]**2 - destSigma**2) else: destSigma = 0.0 tempSigma = self.scales[j] vigOpSourceShape = list(vigOpSourceStop - vigOpSourceStart) if hasTimeAxis: if timeAxis < channelAxis: vigOpSourceShape.insert(timeAxis, ( oldstop - oldstart)[timeAxis]) else: vigOpSourceShape.insert(timeAxis-1, ( oldstop - oldstart)[timeAxis]) vigOpSourceShape.insert(channelAxis, inShape[channelAxis]) sourceArraysForSigmas[j] = numpy.ndarray(tuple(vigOpSourceShape),numpy.float32) for i,vsa in enumerate(sourceArrayV.timeIter()): droi = (tuple(vigOpSourceStart._asint()), tuple(vigOpSourceStop._asint())) tmp_key = getAllExceptAxis(len(sourceArraysForSigmas[j].shape),timeAxis, i) sourceArraysForSigmas[j][tmp_key] = self._computeGaussianSmoothing(vsa, tempSigma, droi) else: droi = (tuple(vigOpSourceStart._asint()), tuple(vigOpSourceStop._asint())) sourceArraysForSigmas[j] = self._computeGaussianSmoothing(sourceArrayV, tempSigma, droi) except RuntimeError as e: if e.message.find('kernel longer than line') > -1: message = "Feature computation error:\nYour image is too small to apply a filter with sigma=%.1f. Please select features with smaller sigmas." % self.scales[j] raise RuntimeError(message) else: raise e del sourceArrayV try: sourceArray.resize((1,), refcheck = False) except ValueError: # Sometimes this fails, but that's okay. logger.debug("Failed to free array memory.") del sourceArray closures = [] #connect individual operators for i in range(dimRow): for j in range(dimCol): val=self.matrix[i,j] if val: vop= self.featureOps[i][j] oslot = vop.outputs["Output"] req = None #inTagKeys = [ax.key for ax in oslot.meta.axistags] #print inTagKeys, flag if hasChannelAxis: slices = oslot.meta.shape[axisindex] if cnt + slices >= rroi.start[axisindex] and rroi.start[axisindex]-cnt<slices and rroi.start[axisindex]+written<rroi.stop[axisindex]: begin = 0 if cnt < rroi.start[axisindex]: begin = rroi.start[axisindex] - cnt end = slices if cnt + end > rroi.stop[axisindex]: end -= cnt + end - rroi.stop[axisindex] key_ = copy.copy(oldkey) key_.insert(axisindex, slice(begin, end, None)) reskey = [slice(None, None, None) for x in range(len(result.shape))] reskey[axisindex] = slice(written, written+end-begin, None) destArea = result[tuple(reskey)] #readjust the roi for the new source array roiSmootherList = list(roiSmoother) roiSmootherList.insert(axisindex, slice(begin, end, None)) if hasTimeAxis: # The time slice in the ROI doesn't matter: # The sourceArrayParameter below overrides the input data to be used. roiSmootherList.insert(timeAxis, 0) roiSmootherRegion = SubRegion(oslot, pslice=roiSmootherList) closure = partial(oslot.operator.execute, oslot, (), roiSmootherRegion, destArea, sourceArray = sourceArraysForSigmas[j]) closures.append(closure) written += end - begin cnt += slices else: if cnt>=rroi.start[axisindex] and rroi.start[axisindex] + written < rroi.stop[axisindex]: reskey = [slice(None, None, None) for x in range(len(result.shape))] slices = oslot.meta.shape[axisindex] reskey[axisindex]=slice(written, written+slices, None) #print "key: ", key, "reskey: ", reskey, "oldkey: ", oldkey, "resshape:", result.shape #print "roiSmoother:", roiSmoother destArea = result[tuple(reskey)] #print "destination area:", destArea.shape logger.debug(oldkey, destArea.shape, sourceArraysForSigmas[j].shape) oldroi = SubRegion(oslot, pslice=oldkey) #print "passing roi:", oldroi closure = partial(oslot.operator.execute, oslot, (), oldroi, destArea, sourceArray = sourceArraysForSigmas[j]) closures.append(closure) written += 1 cnt += 1 pool = RequestPool() for c in closures: r = pool.request(c) pool.wait() pool.clean() for i in range(len(sourceArraysForSigmas)): if sourceArraysForSigmas[i] is not None: try: sourceArraysForSigmas[i].resize((1,)) except: sourceArraysForSigmas[i] = None
def execute(self, slot, subindex, roi, result): progress = 0 numImages = len(self.Images) self.progressSignal(progress) featMatrix=[] labelsMatrix=[] tagList = [] #result[0] = self._svr for i,labels in enumerate(self.inputs["ForegroundLabels"]): if labels.meta.shape is not None: opGaussian = OpGaussianSmoothing(parent = self, graph = self.graph) opGaussian.Sigma.setValue(self.Sigma.value) opGaussian.Input.connect(self.ForegroundLabels[i]) blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait() reqlistlabels = [] reqlistbg = [] reqlistfeat = [] progress += 10 / numImages self.progressSignal(progress) for b in blocks[0]: request = opGaussian.Output[b] #request = labels[b] featurekey = list(b) featurekey[-1] = slice(None, None, None) request2 = self.Images[i][featurekey] request3 = self.inputs["BackgroundLabels"][i][b] reqlistlabels.append(request) reqlistfeat.append(request2) reqlistbg.append(request3) traceLogger.debug("Requests prepared") numLabelBlocks = len(reqlistlabels) progress_outer = [progress] if numLabelBlocks > 0: progressInc = (80 - 10)/(numLabelBlocks * numImages) def progressNotify(req): progress_outer[0] += progressInc/2 self.progressSignal(progress_outer[0]) for ir, req in enumerate(reqlistfeat): req.notify_finished(progressNotify) req.submit() for ir, req in enumerate(reqlistlabels): req.notify_finished(progressNotify) req.submit() for ir, req in enumerate(reqlistbg): req.notify_finished(progressNotify) req.submit() traceLogger.debug("Requests fired") #Fixme: Maybe later request only part of the region? #image=self.inputs["Images"][i][:].wait() for ir, req in enumerate(reqlistlabels): labblock = req.wait() image = reqlistfeat[ir].wait() labbgblock = reqlistbg[ir].wait() labblock = labblock.reshape((image.shape[:-1])) image = image.reshape((-1, image.shape[-1])) labbgindices = np.where(labbgblock == 2) labbgindices = np.ravel_multi_index(labbgindices, labbgblock.shape) newDot, mapping, tags = \ self._svr.prepareDataRefactored(labblock, labbgindices) #self._svr.prepareData(labblock, smooth = True) labels = newDot[mapping] features = image[mapping] featMatrix.append(features) labelsMatrix.append(labels) tagList.append(tags) progress = progress_outer[0] traceLogger.debug("Requests processed") self.progressSignal(80 / numImages) if len(featMatrix) == 0 or len(labelsMatrix) == 0: result[:] = None else: posTags = [tag[0] for tag in tagList] negTags = [tag[1] for tag in tagList] numPosTags = np.sum(posTags) numTags = np.sum(posTags) + np.sum(negTags) fullFeatMatrix = np.ndarray((numTags, self.Images[0].meta.shape[-1]), dtype = np.float64) fullLabelsMatrix = np.ndarray((numTags), dtype = np.float64) fullFeatMatrix[:] = np.NAN fullLabelsMatrix[:] = np.NAN currPosCount = 0 currNegCount = numPosTags for i, posCount in enumerate(posTags): fullFeatMatrix[currPosCount:currPosCount + posTags[i],:] = featMatrix[i][:posCount,:] fullLabelsMatrix[currPosCount:currPosCount + posTags[i]] = labelsMatrix[i][:posCount] fullFeatMatrix[currNegCount:currNegCount + negTags[i],:] = featMatrix[i][posCount:,:] fullLabelsMatrix[currNegCount:currNegCount + negTags[i]] = labelsMatrix[i][posCount:] currPosCount += posTags[i] currNegCount += negTags[i] assert(not np.isnan(np.sum(fullFeatMatrix))) fullTags = [np.sum(posTags), np.sum(negTags)] #pool = RequestPool() maxima = np.max(fullFeatMatrix, axis=0) minima = np.min(fullFeatMatrix, axis=0) normalizationFactors = (minima,maxima) boxConstraintList = [] boxConstraints = None if self.BoxConstraintRois.ready() and self.BoxConstraintValues.ready(): for i, slot in enumerate(zip(self.BoxConstraintRois,self.BoxConstraintValues)): for constr, val in zip(slot[0].value, slot[1].value): boxConstraintList.append((i, constr, val)) if len(boxConstraintList) > 0: boxConstraints = self.constructBoxConstraints(boxConstraintList) params = self._svr.get_params() try: pool = RequestPool() def train_and_store(i): result[i] = SVR(minmax = normalizationFactors, **params) result[i].fitPrepared(fullFeatMatrix, fullLabelsMatrix, tags = fullTags, boxConstraints = boxConstraints, numRegressors = self.numRegressors, trainAll = False) for i in range(self.numRegressors): req = pool.request(partial(train_and_store, i)) pool.wait() pool.clean() except: logger.error("ERROR: could not learn regressor") logger.error("fullFeatMatrix shape = {}, dtype = {}".format(fullFeatMatrix.shape, fullFeatMatrix.dtype) ) logger.error("fullLabelsMatrix shape = {}, dtype = {}".format(fullLabelsMatrix.shape, fullLabelsMatrix.dtype) ) raise finally: self.progressSignal(100) return result
def _execute_graphcut(self, roi, result): for i in (0, 4): assert roi.stop[i] - roi.start[i] == 1,\ "Invalid roi for graph-cut: {}".format(str(roi)) t = roi.start[0] c = roi.start[4] margin = self.Margin.value beta = self.Beta.value MAXBOXSIZE = 10000000 # FIXME justification?? ## request the bounding box coordinates ## # the trailing index brackets give us the dictionary (instead of an # array of size 1) feats = self.BoundingBoxes.get(roi).wait() mins = feats["Coord<Minimum>"] maxs = feats["Coord<Maximum>"] nobj = mins.shape[0] # these are indices, so they should have an index datatype mins = mins.astype(np.uint32) maxs = maxs.astype(np.uint32) ## request the prediction image ## pred = self.Prediction.get(roi).wait() pred = vigra.taggedView(pred, axistags=self.Prediction.meta.axistags) pred = pred.withAxes(*'xyz') ## request the connected components image ## cc = self.LabelImage.get(roi).wait() cc = vigra.taggedView(cc, axistags=self.LabelImage.meta.axistags) cc = cc.withAxes(*'xyz') # provide xyz view for the output (just need 8bit for segmentation resultXYZ = vigra.taggedView(np.zeros(cc.shape, dtype=np.uint8), axistags='xyz') def processSingleObject(i): logger.debug("processing object {}".format(i)) # maxs are inclusive, so we need to add 1 xmin = max(mins[i][0] - margin[0], 0) ymin = max(mins[i][1] - margin[1], 0) zmin = max(mins[i][2] - margin[2], 0) xmax = min(maxs[i][0] + margin[0] + 1, cc.shape[0]) ymax = min(maxs[i][1] + margin[1] + 1, cc.shape[1]) zmax = min(maxs[i][2] + margin[2] + 1, cc.shape[2]) ccbox = cc[xmin:xmax, ymin:ymax, zmin:zmax] resbox = resultXYZ[xmin:xmax, ymin:ymax, zmin:zmax] nVoxels = ccbox.size if nVoxels > MAXBOXSIZE: #problem too large to run graph cut, assign to seed logger.warn("Object {} too large for graph cut.".format(i)) resbox[ccbox == i] = 1 return probbox = pred[xmin:xmax, ymin:ymax, zmin:zmax] gcsegm = segmentGC(probbox, beta) gcsegm = vigra.taggedView(gcsegm, axistags='xyz') ccsegm = vigra.analysis.labelVolumeWithBackground( gcsegm.astype(np.uint8)) # Extended bboxes of different objects might overlap. # To avoid conflicting segmentations, we find all connected # components in the results and only take the one, which # overlaps with the object "core" or "seed", defined by the # pre-thresholding seed = ccbox == i filtered = seed * ccsegm passed = vigra.analysis.unique(filtered.astype(np.uint32)) assert len(passed.shape) == 1 if passed.size > 2: logger.warn("ambiguous label assignment for region {}".format( (xmin, xmax, ymin, ymax, zmin, zmax))) resbox[ccbox == i] = 1 elif passed.size <= 1: logger.warn("box {} segmented out with beta {}".format( i, beta)) else: # assign to the overlap region label = passed[1] # 0 is background resbox[ccsegm == label] = 1 pool = RequestPool() #FIXME make sure that the parallel computations fit into memory for i in range(1, nobj): req = Request(functools.partial(processSingleObject, i)) pool.add(req) logger.info("Processing {} objects ...".format(nobj - 1)) pool.wait() pool.clean() logger.info("object loop done") # prepare result resView = vigra.taggedView(result, axistags=self.Output.meta.axistags) resView = resView.withAxes(*'xyz') # some labels could have been removed => relabel vigra.analysis.labelVolumeWithBackground(resultXYZ, out=resView)
def execute(self, slot, subindex, roi, result): assert slot in [self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return {t: self.prob_cache[t] for t in times if t in self.prob_cache} forests=self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0],)) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t].astype(numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request( partial(predict_forest, t, i) ) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array( prob_predictions[t] ) averaged_predictions = numpy.average( stacked_predictions, axis=0 ) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][0] = 0 # Background probability is always zero if slot == self.Probabilities: return { t : self.prob_cache[t] for t in times } elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = {t: self.prob_cache[t][:, subindex[0]] for t in times} except: # no probabilities available for this class; return zeros prob_single_channel = {t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times} return prob_single_channel elif slot == self.BadObjects: return { t : self.bad_objects[t] for t in times } else: assert False, "Unknown input slot" finally: self.lock.release()
def execute(self, slot, subindex, roi, result): featList = [] all_col_names = [] labelsList = [] # will be available at slot self.Warnings all_bad_objects = defaultdict(lambda: defaultdict(list)) all_bad_feats = set() selected = self.SelectedFeatures([]).wait() if len(selected)==0: # no features - no predictions self.Classifier.setValue(None) return for i in range(len(self.Labels)): # this loop is by image, not time! # TODO: we should be able to use self.Labels[i].value, # but the current implementation of Slot.value() does not # do the right thing. labels_image = self.Labels[i]([]).wait() labels_image_filtered = {} nztimes = [] for timestep, labels_time in labels_image.iteritems(): nz = numpy.nonzero(labels_time) if len(nz[0])==0: continue else: nztimes.append(timestep) labels_image_filtered[timestep] = labels_time if len(nztimes)==0: continue # compute the features if there are nonzero labels in this image # and only for the time steps, which have labels feats = self.Features[i](nztimes).wait() featstmp, row_names, col_names, labelstmp = make_feature_array(feats, selected, labels_image_filtered) if labelstmp.size == 0 or featstmp.size == 0: continue rows, cols = replace_missing(featstmp) featList.append(featstmp) all_col_names.append(tuple(col_names)) labelsList.append(labelstmp) for idx in rows: t, obj = row_names[idx] all_bad_objects[i][t].append(obj) for c in cols: all_bad_feats.add(col_names[c]) if len(labelsList)==0: #no labels, return here self.Classifier.setValue(None) return self._warnBadObjects(all_bad_objects, all_bad_feats) if not len(set(all_col_names)) == 1: raise Exception('different time slices did not have same features.') featMatrix = _concatenate(featList, axis=0) labelsMatrix = _concatenate(labelsList, axis=0) logger.info("training on matrix of shape {}".format(featMatrix.shape)) if featMatrix.size == 0 or labelsMatrix.size == 0: result[:] = None return oob = [0] * self.ForestCount.value try: # train and store forests in parallel pool = RequestPool() for i in range(self.ForestCount.value): def train_and_store(number): result[number] = vigra.learning.RandomForest(self._tree_count) oob[number] = result[number].learnRF(featMatrix.astype(numpy.float32), numpy.asarray(labelsMatrix, dtype=numpy.uint32)) req = Request( partial(train_and_store, i) ) pool.add( req ) pool.wait() pool.clean() except: logger.warn("couldn't learn classifier") raise oob_total = numpy.mean(oob) logger.info("training finished, out of bag error: {}".format(oob_total)) return result
def execute(self, slot, subindex, roi, result): progress = 0 self.progressSignal(progress) numImages = len(self.Images) featMatrix=[] labelsMatrix=[] tagsMatrix = [] result[0] = SVR(self.UnderMult.value, self.OverMult.value, limitDensity = True, **self.SelectedOption.value) for i,labels in enumerate(self.inputs["Labels"]): if labels.meta.shape is not None: #labels=labels[:].wait() blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait() progress += 10/numImages self.progressSignal(progress) reqlistlabels = [] reqlistfeat = [] traceLogger.debug("Sending requests for {} non-zero blocks (labels and data)".format( len(blocks[0])) ) for b in blocks[0]: request = labels[b] featurekey = list(b) featurekey[-1] = slice(None, None, None) request2 = self.inputs["Images"][i][featurekey] reqlistlabels.append(request) reqlistfeat.append(request2) traceLogger.debug("Requests prepared") numLabelBlocks = len(reqlistlabels) progress_outer = [progress] # Store in list for closure access if numLabelBlocks > 0: progressInc = (80-10)/numLabelBlocks/numImages def progressNotify(req): # Note: If we wanted perfect progress reporting, we could use lock here # to protect the progress from being incremented simultaneously. # But that would slow things down and imperfect reporting is okay for our purposes. progress_outer[0] += progressInc/2 self.progressSignal(progress_outer[0]) for ir, req in enumerate(reqlistfeat): image = req.notify_finished(progressNotify) for ir, req in enumerate(reqlistlabels): labblock = req.notify_finished(progressNotify) traceLogger.debug("Requests fired") for ir, req in enumerate(reqlistlabels): traceLogger.debug("Waiting for a label block...") labblock = req.wait() traceLogger.debug("Waiting for an image block...") image = reqlistfeat[ir].wait() newImg, newDot, mapping, tags = \ result[0].prepareData(image, labblock, sigma = self.Sigma.value, smooth = True, normalize = False) features = newImg[mapping] labbla = newDot[mapping] #indexes=np.nonzero(labblock[...,0].view(np.ndarray)) #features=image[indexes] #labbla=labblock[indexes] featMatrix.append(features) labelsMatrix.append(labbla) tagsMatrix.append(tags) progress = progress_outer[0] traceLogger.debug("Requests processed") self.progressSignal(80/numImages) if len(featMatrix) == 0 or len(labelsMatrix) == 0: # If there was no actual data for the random forest to train with, we return None result[:] = None else: featMatrix=np.concatenate(featMatrix,axis=0) labelsMatrix=np.concatenate(labelsMatrix,axis=0) tagsMatrix=np.concatenate(tagsMatrix,axis=0) try: logger.debug("Learning with Vigra...") pool = RequestPool() #result[0].fitPrepared(featMatrix, labelsMatrix, tagsMatrix, self.Epsilon.value) req = pool.request(partial(result[0].fitPrepared, featMatrix, labelsMatrix, tagsMatrix, self.Epsilon.value)) pool.wait() pool.clean() logger.debug("Vigra finished") except: logger.error( "ERROR: could not learn classifier" ) logger.error( "featMatrix shape={}, max={}, dtype={}".format(featMatrix.shape, featMatrix.max(), featMatrix.dtype) ) logger.error( "labelsMatrix shape={}, max={}, dtype={}".format(labelsMatrix.shape, labelsMatrix.max(), labelsMatrix.dtype ) ) raise finally: self.progressSignal(100) return result
def extractHistograms(volume, labels, patchSize=64, haloSize=0, nBins=30, intRange=(0, 255), appendPositions=False): ''' extracts histograms from 3d-volume - labels are 0 ignore 1 positive 2 negative - histogram extraction is attempted to be done in parallel - patches that intersect with the volume border are discarded - volume and labels must be 3d, and in order 'zyx' (if not VigraArrays) - returns: np.ndarray, shape: (nSamples,nBins+1), last column is the label ''' # progress reporter class, histogram extraction can take quite a long time class ProgressReporter(object): lock = None def __init__(self, nThreads): self.lock = ThreadLock() self.nThreads = nThreads self.status = np.zeros((nThreads,)) def report(self, index): self.lock.acquire() self.status[index] = 1 logger.debug("Finished threads: %d/%d." % (self.status.sum(), len(self.status))) self.lock.release() # sanity checks assert len(volume.shape) == 3, "Volume must be 3d data" assert volume.shape == labels.shape,\ "Volume and labels must have the same shape" try: volumeZYX = volume.withAxes(*'zyx') labelsZYX = labels.withAxes(*'zyx') except AttributeError: # can't blame me volumeZYX = volume labelsZYX = labels pass # compute actual patch size patchSize = patchSize + 2*haloSize # fill list of patch centers (VigraArray does not support bitwise_or) ind_z, ind_y, ind_x = np.where( (labelsZYX == 1).view(np.ndarray) | (labelsZYX == 2).view(np.ndarray)) index = np.arange(len(ind_z)) # prepare chunking of histogram centers chunkSize = 10000 # FIXME magic number?? nChunks = len(index)//chunkSize + (1 if len(index) % chunkSize > 0 else 0) sliceList = [slice(k*chunkSize, min((k+1)*chunkSize, len(index))) for k in range(nChunks)] histoList = [None]*nChunks # prepare subroutine for parallel extraction reporter = ProgressReporter(nChunks) #BEGIN subroutine def _extractHistogramsSub(itemList): xs = ind_x[itemList] ys = ind_y[itemList] zs = ind_z[itemList] ymin = ys - patchSize//2 ymax = ymin + patchSize xmin = xs - patchSize//2 xmax = xmin + patchSize validPatchIndices = np.where( np.all( (ymin >= 0, xmin >= 0, xmax <= volumeZYX.shape[2], ymax <= volumeZYX.shape[1]), axis=0))[0] if appendPositions: out = np.zeros((len(validPatchIndices), nBins+4)) else: out = np.zeros((len(validPatchIndices), nBins+1)) for k, patchInd in enumerate(validPatchIndices): x = xs[patchInd] y = ys[patchInd] z = zs[patchInd] vol = volumeZYX[z, ymin[patchInd]:ymax[patchInd], xmin[patchInd]:xmax[patchInd]] (out[k, :nBins], _) = np.histogram( vol, bins=nBins, range=intRange, density=True) out[k, nBins] = 1 if labelsZYX[z, y, x] == 1 else 0 if appendPositions: out[k, nBins+1:] = [z, y, x] return out def partFun(i): itemList = index[sliceList[i]] histos = _extractHistogramsSub(itemList) histoList[i] = histos reporter.report(i) #END subroutine # pool the extraction requests pool = RequestPool() for i in range(nChunks): req = Request(partial(partFun, i)) pool.add(req) pool.wait() pool.clean() return np.vstack(histoList)
def _felzenszwalbTraining(self, negative, positive): ''' we want to train on a 'hard' subset of the training data, see FELZENSZWALB ET AL.: OBJECT DETECTION WITH DISCRIMINATIVELY TRAINED PART-BASED MODELS (4.4), PAMI 32/9 ''' #TODO sanity checks n = (self.PatchSize.value + self.HaloSize.value)**2 method = self.DetectionMethod.value # set options for Felzenszwalb training firstSamples = self._felzenOpts["firstSamples"] maxRemovePerStep = self._felzenOpts["maxRemovePerStep"] maxAddPerStep = self._felzenOpts["maxAddPerStep"] maxSamples = self._felzenOpts["maxSamples"] nTrainingSteps = self._felzenOpts["nTrainingSteps"] # initial choice of training samples (initNegative, choiceNegative, _, _) = \ _chooseRandomSubset(negative, min(firstSamples, len(negative))) (initPositive, choicePositive, _, _) = \ _chooseRandomSubset(positive, min(firstSamples, len(positive))) # setup for parallel training samples = [negative, positive] choice = [choiceNegative, choicePositive] S_t = [initNegative, initPositive] finished = [False, False] ### BEGIN SUBROUTINE ### def felzenstep(x, cache, ind): case = ("positive" if ind > 0 else "negative") + " set" pred = self.predict(x, method=method) hard = np.where(pred != ind)[0] easy = np.setdiff1d(range(len(x)), hard) logger.debug(" {}: currently {} hard and {} easy samples".format( case, len(hard), len(easy))) # shrink the cache easyInCache = np.intersect1d(easy, cache) if len(easy) > 0 else [] if len(easyInCache) > 0: (removeFromCache, _, _, _) = _chooseRandomSubset( easyInCache, min(len(easyInCache), maxRemovePerStep)) cache = np.setdiff1d(cache, removeFromCache) logger.debug(" {}: shrunk the cache by {} elements".format( case, len(removeFromCache))) # grow the cache temp = len(cache) addToCache = _chooseRandomSubset( hard, min(len(hard), maxAddPerStep))[0] cache = np.union1d(cache, addToCache) addedHard = len(cache)-temp logger.debug(" {}: grown the cache by {} elements".format( case, addedHard)) if len(cache) > maxSamples: logger.debug( " {}: Cache to big, removing elements.".format(case)) cache = _chooseRandomSubset(cache, maxSamples)[0] # apply the cache C = x[cache] return (C, cache, addedHard == 0) ### END SUBROUTINE ### ### BEGIN PARALLELIZATION FUNCTION ### def partFun(i): (C, newChoice, newFinished) = felzenstep(samples[i], choice[i], i) S_t[i] = C choice[i] = newChoice finished[i] = newFinished ### END PARALLELIZATION FUNCTION ### for k in range(nTrainingSteps): logger.debug( "Felzenszwalb Training " + "(step {}/{}): {} hard negative samples, {}".format( k+1, nTrainingSteps, len(S_t[0]), len(S_t[1])) + "hard positive samples.") self.fit(S_t[0], S_t[1], method=method) pool = RequestPool() for i in range(len(S_t)): req = Request(partial(partFun, i)) pool.add(req) pool.wait() pool.clean() if np.all(finished): #already have all hard examples in training set break self.fit(S_t[0], S_t[1], method=method) logger.debug(" Finished Felzenszwalb Training.")
def execute(self, slot, subindex, roi, result): assert slot in [ self.Predictions, self.Probabilities, self.CachedProbabilities, self.ProbabilityChannels, self.BadObjects ] times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) if slot is self.CachedProbabilities: return { t: self.prob_cache[t] for t in times if t in self.prob_cache } forests = self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} selected = self.SelectedFeatures([]).wait() # FIXME: self.prob_cache is shared, so we need to block. # However, this makes prediction single-threaded. self.lock.acquire() try: for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() ftmatrix, _, col_names = make_feature_array(tmpfeats, selected) rows, cols = replace_missing(ftmatrix) self.bad_objects[t] = numpy.zeros((ftmatrix.shape[0], )) self.bad_objects[t][rows] = 1 feats[t] = ftmatrix prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[ forest_index].predictProbabilities(feats[_t].astype( numpy.float32)) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request(partial(predict_forest, t, i)) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array(prob_predictions[t]) averaged_predictions = numpy.average(stacked_predictions, axis=0) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][ 0] = 0 # Background probability is always zero if slot == self.Probabilities: return {t: self.prob_cache[t] for t in times} elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = dict() for t in times: prob_sum = numpy.sum(self.prob_cache[t], axis=1) labels[t] = 1 + numpy.argmax(self.prob_cache[t], axis=1) labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: try: prob_single_channel = { t: self.prob_cache[t][:, subindex[0]] for t in times } except: # no probabilities available for this class; return zeros prob_single_channel = { t: numpy.zeros((self.prob_cache[t].shape[0], 1)) for t in times } return prob_single_channel elif slot == self.BadObjects: return {t: self.bad_objects[t] for t in times} else: assert False, "Unknown input slot" finally: self.lock.release()
def execute(self, slot, subindex, slot_roi, target): assert slot == self.Features or slot == self.Output if slot == self.Features: feature_slice = roiToSlice(slot_roi.start, slot_roi.stop) index = subindex[0] feature_slice = list(feature_slice) # Translate channel slice of this feature to the channel slice of the output slot. output_channel_offset = self.featureOutputChannels[index][0] feature_slice[1] = slice( output_channel_offset + feature_slice[1].start, output_channel_offset + feature_slice[1].stop) slot_roi = SubRegion(self.Output, pslice=feature_slice) # Get output slot region for this channel return self.execute(self.Output, (), slot_roi, target) elif slot == self.Output: # Correlation of variable 'families' representing reference frames: # ______________________________ # | input/output frame | input/output shape given by slots # | _________________________ | # | | smooth frame | | pre-smoothing op needs halo around filter roi # | | ____________________ | | # | | |filter frame | | | filter needs halo around target roi # | | | _______________ | | | # | | | | target frame | | | | target is given by output_roi # note: The 'full_' variable prefix refers to the full 5D shape (tczyx), without 'full_' variables mostly # refer to the 3D space subregion (zyx) full_output_slice = slot_roi.toSlice() logger.debug( f"OpPixelFeaturesPresmoothed: request {slot_roi.pprint()}") assert (slot_roi.stop <= self.Output.meta.shape).all() full_output_shape = self.Output.meta.shape full_output_start, full_output_stop = sliceToRoi( full_output_slice, full_output_shape) assert len(full_output_shape) == 5 if all(self.ComputeIn2d.value ): # todo: check for this particular slice axes2enlarge = (0, 1, 1) else: axes2enlarge = (1, 1, 1) output_shape = full_output_shape[2:] output_start = full_output_start[2:] output_stop = full_output_stop[2:] axistags = self.Output.meta.axistags target = target.view(vigra.VigraArray) target.axistags = copy.copy(axistags) # filter roi in input frame # sigma = 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood input_filter_start, input_filter_stop = roi.enlargeRoiForHalo( output_start, output_stop, output_shape, 0.7, self.WINDOW_SIZE, enlarge_axes=axes2enlarge) # smooth roi in input frame input_smooth_start, input_smooth_stop = roi.enlargeRoiForHalo( input_filter_start, input_filter_stop, output_shape, self.max_sigma, self.WINDOW_SIZE, enlarge_axes=axes2enlarge, ) # target roi in filter frame filter_target_start = roi.TinyVector(output_start - input_filter_start) filter_target_stop = roi.TinyVector(output_stop - input_filter_start) # filter roi in smooth frame smooth_filter_start = roi.TinyVector(input_filter_start - input_smooth_start) smooth_filter_stop = roi.TinyVector(input_filter_stop - input_smooth_start) filter_target_slice = roi.roiToSlice(filter_target_start, filter_target_stop) input_smooth_slice = roi.roiToSlice(input_smooth_start, input_smooth_stop) # pre-smooth for all requested time slices and all channels full_input_smooth_slice = (full_output_slice[0], slice(None), *input_smooth_slice) req = self.Input[full_input_smooth_slice] source = req.wait() req.clean() req.destination = None if source.dtype != numpy.float32: sourceF = source.astype(numpy.float32) try: source.resize((1, ), refcheck=False) except Exception: pass del source source = sourceF sourceV = source.view(vigra.VigraArray) sourceV.axistags = copy.copy(self.Input.meta.axistags) dimCol = len(self.scales) dimRow = self.matrix.shape[0] presmoothed_source = [None] * dimCol source_smooth_shape = tuple(smooth_filter_stop - smooth_filter_start) full_source_smooth_shape = ( full_output_stop[0] - full_output_start[0], self.Input.meta.shape[1], ) + source_smooth_shape try: for j in range(dimCol): for i in range(dimRow): if self.matrix[i, j]: # There is at least one filter op with this scale break else: # There is no filter op at this scale continue if self.scales[j] > 1.0: tempSigma = math.sqrt(self.scales[j]**2 - 1.0) else: tempSigma = self.scales[j] presmoothed_source[j] = numpy.ndarray( full_source_smooth_shape, numpy.float32) droi = ( (0, *tuple(smooth_filter_start._asint())), (sourceV.shape[1], *tuple(smooth_filter_stop._asint())), ) for i, vsa in enumerate(sourceV.timeIter()): presmoothed_source[j][ i, ...] = self._computeGaussianSmoothing( vsa, tempSigma, droi, in2d=self.ComputeIn2d.value[j]) except RuntimeError as e: if "kernel longer than line" in str(e): raise RuntimeError( "Feature computation error:\nYour image is too small to apply a filter with " f"sigma={self.scales[j]:.1f}. Please select features with smaller sigmas." ) else: raise e del sourceV try: source.resize((1, ), refcheck=False) except ValueError: # Sometimes this fails, but that's okay. logger.debug("Failed to free array memory.") del source cnt = 0 written = 0 closures = [] # connect individual operators for i in range(dimRow): for j in range(dimCol): if self.matrix[i, j]: oslot = self.featureOps[i][j].Output req = None slices = oslot.meta.shape[1] if (cnt + slices >= slot_roi.start[1] and slot_roi.start[1] - cnt < slices and slot_roi.start[1] + written < slot_roi.stop[1]): begin = 0 if cnt < slot_roi.start[1]: begin = slot_roi.start[1] - cnt end = slices if cnt + end > slot_roi.stop[1]: end = slot_roi.stop[1] - cnt # feature slice in output frame feature_slice = (slice(None), slice( written, written + end - begin)) + (slice(None), ) * 3 subtarget = target[feature_slice] # readjust the roi for the new source array full_filter_target_slice = [ full_output_slice[0], slice(begin, end), *filter_target_slice ] filter_target_roi = SubRegion( oslot, pslice=full_filter_target_slice) closure = partial( oslot.operator.execute, oslot, (), filter_target_roi, subtarget, sourceArray=presmoothed_source[j], ) closures.append(closure) written += end - begin cnt += slices pool = RequestPool() for c in closures: pool.request(c) pool.wait() pool.clean() for i in range(len(presmoothed_source)): if presmoothed_source[i] is not None: try: presmoothed_source[i].resize((1, )) except Exception: presmoothed_source[i] = None
def execute(self, slot, subindex, roi, result): featMatrix = [] labelsMatrix = [] for i in range(len(self.Labels)): feats = self.Features[i]([]).wait() # TODO: we should be able to use self.Labels[i].value, # but the current implementation of Slot.value() does not # do the right thing. labels = self.Labels[i]([]).wait() for t in sorted(feats.keys()): featsMatrix_tmp = [] labelsMatrix_tmp = [] lab = labels[t].squeeze() index = numpy.nonzero(lab) labelsMatrix_tmp.append(lab[index]) #check that all requested features are present for featname in config.selected_features: for channel in feats[t]: if not featname in channel.keys(): print "Feature", featname, "has not been computed in the previous step" print "We only have the following features now:", channel.keys() result[:] = None return else: value = channel[featname] ft = numpy.asarray(value.squeeze()) featsMatrix_tmp.append(ft[index]) featMatrix.append(_concatenate(featsMatrix_tmp, axis=1)) labelsMatrix.append(_concatenate(labelsMatrix_tmp, axis=1)) featMatrix = _concatenate(featMatrix, axis=0) labelsMatrix = _concatenate(labelsMatrix, axis=0) print "training on matrix:", featMatrix.shape if len(featMatrix) == 0 or len(labelsMatrix) == 0: result[:] = None return oob = [0]*self.ForestCount.value try: # Ensure there are no NaNs in the feature matrix # TODO: There should probably be a better way to fix this... featMatrix = numpy.asarray(featMatrix, dtype=numpy.float32) nanFeatMatrix = numpy.isnan(featMatrix) if nanFeatMatrix.any(): warnings.warn("Feature matrix has NaN values! Replacing with 0.0...") featMatrix[numpy.where(nanFeatMatrix)] = 0.0 # train and store forests in parallel pool = RequestPool() for i in range(self.ForestCount.value): def train_and_store(number): result[number] = vigra.learning.RandomForest(self._tree_count) oob[number] = result[number].learnRF(featMatrix, numpy.asarray(labelsMatrix, dtype=numpy.uint32)) print "intermediate oob:", oob[number] req = Request( partial(train_and_store, i) ) pool.add( req ) pool.wait() pool.clean() except: print ("couldn't learn classifier") raise oob_total = numpy.mean(oob) print "training finished, out of bag error:", oob_total return result
def _executeOutput(self, slot, subindex, roi, result): t = time.time() key = roi.toSlice() shape = self.Output.meta.shape start, stop = sliceToRoi(key, shape) self._lock.acquire() ch = self._cacheHits ch += 1 self._cacheHits = ch self._running += 1 if self._cache is None: self._allocateCache() cacheView = self._cache[:] #prevent freeing of cache during running this function blockStart = (1.0 * start / self._blockShape).floor() blockStop = (1.0 * stop / self._blockShape).ceil() blockKey = roiToSlice(blockStart, blockStop) blockSet = self._blockState[blockKey] # this is a little optimization to shortcut # many lines of python code when all data is # is already in the cache: if numpy.logical_or(blockSet == OpArrayCache.CLEAN, blockSet == OpArrayCache.FIXED_DIRTY).all(): result[:] = self._cache[roiToSlice(start, stop)] self._running -= 1 self._updatePriority() cacheView = None self._lock.release() return inProcessQueries = numpy.unique( numpy.extract(blockSet == OpArrayCache.IN_PROCESS, self._blockQuery[blockKey])) cond = (blockSet == OpArrayCache.DIRTY) tileWeights = fastWhere(cond, 1, 128**3, numpy.uint32) trueDirtyIndices = numpy.nonzero(cond) tileArray = drtile.test_DRTILE(tileWeights, 128**3).swapaxes(0, 1) dirtyRois = [] half = tileArray.shape[0] / 2 dirtyPool = RequestPool() for i in range(tileArray.shape[1]): drStart3 = tileArray[:half, i] drStop3 = tileArray[half:, i] drStart2 = drStart3 + blockStart drStop2 = drStop3 + blockStart drStart = drStart2 * self._blockShape drStop = drStop2 * self._blockShape shape = self.Output.meta.shape drStop = numpy.minimum(drStop, shape) drStart = numpy.minimum(drStart, shape) key3 = roiToSlice(drStart3, drStop3) key2 = roiToSlice(drStart2, drStop2) key = roiToSlice(drStart, drStop) if not self._fixed: dirtyRois.append([drStart, drStop]) req = self.inputs["Input"][key].writeInto(self._cache[key]) req.uncancellable = True #FIXME dirtyPool.add(req) self._blockQuery[key2] = weakref.ref(req) #sanity check: if (self._blockState[key2] != OpArrayCache.DIRTY).any(): logger.warning("original condition" + str(cond)) logger.warning("original tilearray {} {}".format( tileArray, tileArray.shape)) logger.warning("original tileWeights {} {}".format( tileWeights, tileWeights.shape)) logger.warning("sub condition {}".format( self._blockState[key2] == OpArrayCache.DIRTY)) logger.warning("START={}, STOP={}".format( drStart2, drStop2)) import h5py with h5py.File("test.h5", "w") as f: f.create_dataset("data", data=tileWeights) logger.warning( "%r \n %r \n %r\n %r\n %r \n%r" % (key2, blockKey, self._blockState[key2], self._blockState[blockKey][trueDirtyIndices], self._blockState[blockKey], tileWeights)) assert False self._blockState[key2] = OpArrayCache.IN_PROCESS # indicate the inprocessing state, by setting array to 0 (i.e. IN_PROCESS) if not self._fixed: blockSet[:] = fastWhere(cond, OpArrayCache.IN_PROCESS, blockSet, numpy.uint8) else: # Someone asked for some dirty blocks while we were fixed. # Mark these blocks to be signaled as dirty when we become unfixed blockSet[:] = fastWhere(cond, OpArrayCache.FIXED_DIRTY, blockSet, numpy.uint8) self._has_fixed_dirty_blocks = True self._lock.release() temp = itertools.count(0) #wait for all requests to finish dirtyPool.wait() if len(dirtyPool) > 0: # Signal that something was updated. # Note that we don't need to do this for the 'in process' queries (below) # because they are already in the dirtyPool in some other thread self.Output._sig_value_changed() dirtyPool.clean() # indicate the finished inprocess state (i.e. CLEAN) if not self._fixed and temp.next() == 0: with self._lock: blockSet[:] = fastWhere(cond, OpArrayCache.CLEAN, blockSet, numpy.uint8) self._blockQuery[blockKey] = fastWhere( cond, None, self._blockQuery[blockKey], object) inProcessPool = RequestPool() #wait for all in process queries for req in inProcessQueries: req = req() # get original req object from weakref if req is not None: inProcessPool.add(req) inProcessPool.wait() inProcessPool.clean() # finally, store results in result area self._lock.acquire() if self._cache is not None: result[:] = self._cache[roiToSlice(start, stop)] else: self.inputs["Input"][roiToSlice(start, stop)].writeInto(result).wait() self._running -= 1 self._updatePriority() cacheView = None self._lock.release() self.logger.debug("read %s took %f sec." % (roi.pprint(), time.time() - t))
def execute(self, slot, subindex, roi, result): featList = [] all_col_names = [] labelsList = [] # will be available at slot self.Warnings all_bad_objects = defaultdict(lambda: defaultdict(list)) all_bad_feats = set() selected = self.SelectedFeatures([]).wait() if len(selected) == 0: # no features - no predictions self.Classifier.setValue(None) return for i in range(len(self.Labels)): # FIXME: we should only compute the features if there are nonzero labels in this image feats = self.Features[i]([]).wait() # TODO: we should be able to use self.Labels[i].value, # but the current implementation of Slot.value() does not # do the right thing. labels = self.Labels[i]([]).wait() featstmp, row_names, col_names, labelstmp = make_feature_array( feats, selected, labels) if labelstmp.size == 0 or featstmp.size == 0: continue rows, cols = replace_missing(featstmp) featList.append(featstmp) all_col_names.append(tuple(col_names)) labelsList.append(labelstmp) for idx in rows: t, obj = row_names[idx] all_bad_objects[i][t].append(obj) for c in cols: all_bad_feats.add(col_names[c]) if len(labelsList) == 0: #no labels, return here self.Classifier.setValue(None) return self._warnBadObjects(all_bad_objects, all_bad_feats) if not len(set(all_col_names)) == 1: raise Exception( 'different time slices did not have same features.') featMatrix = _concatenate(featList, axis=0) labelsMatrix = _concatenate(labelsList, axis=0) logger.info("training on matrix of shape {}".format(featMatrix.shape)) if featMatrix.size == 0 or labelsMatrix.size == 0: result[:] = None return oob = [0] * self.ForestCount.value try: # train and store forests in parallel pool = RequestPool() for i in range(self.ForestCount.value): def train_and_store(number): result[number] = vigra.learning.RandomForest( self._tree_count) oob[number] = result[number].learnRF( featMatrix.astype(numpy.float32), numpy.asarray(labelsMatrix, dtype=numpy.uint32)) req = Request(partial(train_and_store, i)) pool.add(req) pool.wait() pool.clean() except: logger.warn("couldn't learn classifier") raise oob_total = numpy.mean(oob) logger.info( "training finished, out of bag error: {}".format(oob_total)) return result
def _executeOutput(self, slot, subindex, roi, result): t = time.time() key = roi.toSlice() shape = self.Output.meta.shape start, stop = sliceToRoi(key, shape) with self._lock: ch = self._cacheHits ch += 1 self._cacheHits = ch self._running += 1 if self._cache is None: self._allocateCache() cacheView = self._cache[:] #prevent freeing of cache during running this function blockStart = (1.0 * start / self._blockShape).floor() blockStop = (1.0 * stop / self._blockShape).ceil() blockKey = roiToSlice(blockStart,blockStop) blockSet = self._blockState[blockKey] # this is a little optimization to shortcut # many lines of python code when all data is # is already in the cache: if numpy.logical_or(blockSet == OpArrayCache.CLEAN, blockSet == OpArrayCache.FIXED_DIRTY).all(): result[:] = self._cache[roiToSlice(start, stop)] self._running -= 1 self._updatePriority() cacheView = None return inProcessQueries = numpy.unique(numpy.extract( blockSet == OpArrayCache.IN_PROCESS, self._blockQuery[blockKey])) cond = (blockSet == OpArrayCache.DIRTY) tileWeights = fastWhere(cond, 1, 128**3, numpy.uint32) trueDirtyIndices = numpy.nonzero(cond) tileArray = drtile.test_DRTILE(tileWeights, 128**3).swapaxes(0,1) dirtyRois = [] half = tileArray.shape[0]/2 dirtyPool = RequestPool() for i in range(tileArray.shape[1]): drStart3 = tileArray[:half,i] drStop3 = tileArray[half:,i] drStart2 = drStart3 + blockStart drStop2 = drStop3 + blockStart drStart = drStart2*self._blockShape drStop = drStop2*self._blockShape shape = self.Output.meta.shape drStop = numpy.minimum(drStop, shape) drStart = numpy.minimum(drStart, shape) key3 = roiToSlice(drStart3,drStop3) key2 = roiToSlice(drStart2,drStop2) key = roiToSlice(drStart,drStop) if not self._fixed: dirtyRois.append([drStart,drStop]) req = self.inputs["Input"][key].writeInto(self._cache[key]) req.uncancellable = True #FIXME dirtyPool.add(req) self._blockQuery[key2] = weakref.ref(req) #sanity check: if (self._blockState[key2] != OpArrayCache.DIRTY).any(): logger.warning( "original condition" + str(cond) ) logger.warning( "original tilearray {} {}".format( tileArray, tileArray.shape ) ) logger.warning( "original tileWeights {} {}".format( tileWeights, tileWeights.shape ) ) logger.warning( "sub condition {}".format( self._blockState[key2] == OpArrayCache.DIRTY ) ) logger.warning( "START={}, STOP={}".format( drStart2, drStop2 ) ) import h5py with h5py.File("test.h5", "w") as f: f.create_dataset("data",data = tileWeights) logger.warning( "%r \n %r \n %r\n %r\n %r \n%r" % (key2, blockKey,self._blockState[key2], self._blockState[blockKey][trueDirtyIndices],self._blockState[blockKey],tileWeights) ) assert False self._blockState[key2] = OpArrayCache.IN_PROCESS # indicate the inprocessing state, by setting array to 0 (i.e. IN_PROCESS) if not self._fixed: blockSet[:] = fastWhere(cond, OpArrayCache.IN_PROCESS, blockSet, numpy.uint8) else: # Someone asked for some dirty blocks while we were fixed. # Mark these blocks to be signaled as dirty when we become unfixed blockSet[:] = fastWhere(cond, OpArrayCache.FIXED_DIRTY, blockSet, numpy.uint8) self._has_fixed_dirty_blocks = True temp = itertools.count(0) #wait for all requests to finish dirtyPool.wait() if len( dirtyPool ) > 0: # Signal that something was updated. # Note that we don't need to do this for the 'in process' queries (below) # because they are already in the dirtyPool in some other thread self.Output._sig_value_changed() dirtyPool.clean() # indicate the finished inprocess state (i.e. CLEAN) if not self._fixed and temp.next() == 0: with self._lock: blockSet[:] = fastWhere(cond, OpArrayCache.CLEAN, blockSet, numpy.uint8) self._blockQuery[blockKey] = fastWhere(cond, None, self._blockQuery[blockKey], object) inProcessPool = RequestPool() #wait for all in process queries for req in inProcessQueries: req = req() # get original req object from weakref if req is not None: inProcessPool.add(req) inProcessPool.wait() inProcessPool.clean() # finally, store results in result area with self._lock: if self._cache is not None: result[:] = self._cache[roiToSlice(start, stop)] else: self.inputs["Input"][roiToSlice(start, stop)].writeInto(result).wait() self._running -= 1 self._updatePriority() cacheView = None self.logger.debug("read %s took %f sec." % (roi.pprint(), time.time()-t))
def execute(self, slot, subindex, roi, result): progress = 0 self.progressSignal(progress) numImages = len(self.Images) key = roi.toSlice() featMatrix=[] labelsMatrix=[] for i,labels in enumerate(self.inputs["Labels"]): if labels.meta.shape is not None: #labels=labels[:].wait() blocks = self.inputs["nonzeroLabelBlocks"][i][0].wait() progress += 10/numImages self.progressSignal(progress) reqlistlabels = [] reqlistfeat = [] traceLogger.debug("Sending requests for {} non-zero blocks (labels and data)".format( len(blocks[0])) ) for b in blocks[0]: request = labels[b] featurekey = list(b) featurekey[-1] = slice(None, None, None) request2 = self.inputs["Images"][i][featurekey] reqlistlabels.append(request) reqlistfeat.append(request2) traceLogger.debug("Requests prepared") numLabelBlocks = len(reqlistlabels) progress_outer = [progress] # Store in list for closure access if numLabelBlocks > 0: progressInc = (80-10)/numLabelBlocks/numImages def progressNotify(req): # Note: If we wanted perfect progress reporting, we could use lock here # to protect the progress from being incremented simultaneously. # But that would slow things down and imperfect reporting is okay for our purposes. progress_outer[0] += progressInc/2 self.progressSignal(progress_outer[0]) for ir, req in enumerate(reqlistfeat): image = req.notify_finished(progressNotify) for ir, req in enumerate(reqlistlabels): labblock = req.notify_finished(progressNotify) traceLogger.debug("Requests fired") for ir, req in enumerate(reqlistlabels): traceLogger.debug("Waiting for a label block...") labblock = req.wait() traceLogger.debug("Waiting for an image block...") image = reqlistfeat[ir].wait() indexes=numpy.nonzero(labblock[...,0].view(numpy.ndarray)) features=image[indexes] labbla=labblock[indexes] featMatrix.append(features) labelsMatrix.append(labbla) progress = progress_outer[0] traceLogger.debug("Requests processed") self.progressSignal(80/numImages) if len(featMatrix) == 0 or len(labelsMatrix) == 0: # If there was no actual data for the random forest to train with, we return None result[:] = None else: featMatrix=numpy.concatenate(featMatrix,axis=0) labelsMatrix=numpy.concatenate(labelsMatrix,axis=0) maxLabel = self.inputs["MaxLabel"].value labelList = range(1, maxLabel+1) if maxLabel > 0 else list() try: logger.debug("Learning with Vigra...") # train and store self._forest_count forests in parallel pool = RequestPool() for i in range(self._forest_count): def train_and_store(number): result[number] = vigra.learning.RandomForest(self._tree_count, labels=labelList) result[number].learnRF( numpy.asarray(featMatrix, dtype=numpy.float32), numpy.asarray(labelsMatrix, dtype=numpy.uint32)) req = pool.request(partial(train_and_store, i)) pool.wait() pool.clean() logger.debug("Vigra finished") except: logger.error( "ERROR: could not learn classifier" ) logger.error( "featMatrix shape={}, max={}, dtype={}".format(featMatrix.shape, featMatrix.max(), featMatrix.dtype) ) logger.error( "labelsMatrix shape={}, max={}, dtype={}".format(labelsMatrix.shape, labelsMatrix.max(), labelsMatrix.dtype ) ) raise finally: self.progressSignal(100) return result
def execute(self, slot, subindex, slot_roi, target): assert slot == self.Features or slot == self.Output if slot == self.Features: feature_slice = roiToSlice(slot_roi.start, slot_roi.stop) index = subindex[0] feature_slice = list(feature_slice) # Translate channel slice of this feature to the channel slice of the output slot. output_channel_offset = self.featureOutputChannels[index][0] feature_slice[1] = slice( output_channel_offset + feature_slice[1].start, output_channel_offset + feature_slice[1].stop ) slot_roi = SubRegion(self.Output, pslice=feature_slice) # Get output slot region for this channel return self.execute(self.Output, (), slot_roi, target) elif slot == self.Output: # Correlation of variable 'families' representing reference frames: # ______________________________ # | input/output frame | input/output shape given by slots # | _________________________ | # | | smooth frame | | pre-smoothing op needs halo around filter roi # | | ____________________ | | # | | |filter frame | | | filter needs halo around target roi # | | | _______________ | | | # | | | | target frame | | | | target is given by output_roi # note: The 'full_' variable prefix refers to the full 5D shape (tczyx), without 'full_' variables mostly # refer to the 3D space subregion (zyx) full_output_slice = slot_roi.toSlice() logger.debug(f"OpPixelFeaturesPresmoothed: request {slot_roi.pprint()}") assert (slot_roi.stop <= self.Output.meta.shape).all() full_output_shape = self.Output.meta.shape full_output_start, full_output_stop = sliceToRoi(full_output_slice, full_output_shape) assert len(full_output_shape) == 5 if all(self.ComputeIn2d.value): # todo: check for this particular slice axes2enlarge = (0, 1, 1) else: axes2enlarge = (1, 1, 1) output_shape = full_output_shape[2:] output_start = full_output_start[2:] output_stop = full_output_stop[2:] axistags = self.Output.meta.axistags target = target.view(vigra.VigraArray) target.axistags = copy.copy(axistags) # filter roi in input frame # sigma = 0.7, because the features receive a pre-smoothed array and don't need much of a neighborhood input_filter_start, input_filter_stop = roi.enlargeRoiForHalo( output_start, output_stop, output_shape, 0.7, self.WINDOW_SIZE, enlarge_axes=axes2enlarge ) # smooth roi in input frame input_smooth_start, input_smooth_stop = roi.enlargeRoiForHalo( input_filter_start, input_filter_stop, output_shape, self.max_sigma, self.WINDOW_SIZE, enlarge_axes=axes2enlarge, ) # target roi in filter frame filter_target_start = roi.TinyVector(output_start - input_filter_start) filter_target_stop = roi.TinyVector(output_stop - input_filter_start) # filter roi in smooth frame smooth_filter_start = roi.TinyVector(input_filter_start - input_smooth_start) smooth_filter_stop = roi.TinyVector(input_filter_stop - input_smooth_start) filter_target_slice = roi.roiToSlice(filter_target_start, filter_target_stop) input_smooth_slice = roi.roiToSlice(input_smooth_start, input_smooth_stop) # pre-smooth for all requested time slices and all channels full_input_smooth_slice = (full_output_slice[0], slice(None), *input_smooth_slice) req = self.Input[full_input_smooth_slice] source = req.wait() req.clean() req.destination = None if source.dtype != numpy.float32: sourceF = source.astype(numpy.float32) try: source.resize((1,), refcheck=False) except Exception: pass del source source = sourceF sourceV = source.view(vigra.VigraArray) sourceV.axistags = copy.copy(self.Input.meta.axistags) dimCol = len(self.scales) dimRow = self.matrix.shape[0] presmoothed_source = [None] * dimCol source_smooth_shape = tuple(smooth_filter_stop - smooth_filter_start) full_source_smooth_shape = ( full_output_stop[0] - full_output_start[0], self.Input.meta.shape[1], ) + source_smooth_shape try: for j in range(dimCol): for i in range(dimRow): if self.matrix[i, j]: # There is at least one filter op with this scale break else: # There is no filter op at this scale continue if self.scales[j] > 1.0: tempSigma = math.sqrt(self.scales[j] ** 2 - 1.0) else: tempSigma = self.scales[j] presmoothed_source[j] = numpy.ndarray(full_source_smooth_shape, numpy.float32) droi = ( (0, *tuple(smooth_filter_start._asint())), (sourceV.shape[1], *tuple(smooth_filter_stop._asint())), ) for i, vsa in enumerate(sourceV.timeIter()): presmoothed_source[j][i, ...] = self._computeGaussianSmoothing( vsa, tempSigma, droi, in2d=self.ComputeIn2d.value[j] ) except RuntimeError as e: if "kernel longer than line" in str(e): raise RuntimeError( "Feature computation error:\nYour image is too small to apply a filter with " f"sigma={self.scales[j]:.1f}. Please select features with smaller sigmas." ) else: raise e del sourceV try: source.resize((1,), refcheck=False) except ValueError: # Sometimes this fails, but that's okay. logger.debug("Failed to free array memory.") del source cnt = 0 written = 0 closures = [] # connect individual operators for i in range(dimRow): for j in range(dimCol): if self.matrix[i, j]: oslot = self.featureOps[i][j].Output req = None slices = oslot.meta.shape[1] if ( cnt + slices >= slot_roi.start[1] and slot_roi.start[1] - cnt < slices and slot_roi.start[1] + written < slot_roi.stop[1] ): begin = 0 if cnt < slot_roi.start[1]: begin = slot_roi.start[1] - cnt end = slices if cnt + end > slot_roi.stop[1]: end = slot_roi.stop[1] - cnt # feature slice in output frame feature_slice = (slice(None), slice(written, written + end - begin)) + (slice(None),) * 3 subtarget = target[feature_slice] # readjust the roi for the new source array full_filter_target_slice = [full_output_slice[0], slice(begin, end), *filter_target_slice] filter_target_roi = SubRegion(oslot, pslice=full_filter_target_slice) closure = partial( oslot.operator.execute, oslot, (), filter_target_roi, subtarget, sourceArray=presmoothed_source[j], ) closures.append(closure) written += end - begin cnt += slices pool = RequestPool() for c in closures: pool.request(c) pool.wait() pool.clean() for i in range(len(presmoothed_source)): if presmoothed_source[i] is not None: try: presmoothed_source[i].resize((1,)) except Exception: presmoothed_source[i] = None
def execute(self, slot, subindex, roi, result): assert slot == self.Predictions or slot == self.Probabilities or slot == self.ProbabilityChannels times = roi._l if len(times) == 0: # we assume that 0-length requests are requesting everything times = range(self.Predictions.meta.shape[0]) forests=self.inputs["Classifier"][:].wait() if forests is None or forests[0] is None: # this happens if there was no data to train with return dict((t, numpy.array([])) for t in times) feats = {} prob_predictions = {} for t in times: if t in self.prob_cache: continue tmpfeats = self.Features([t]).wait() feats[t] = make_feature_array(tmpfeats) prob_predictions[t] = [0] * len(forests) def predict_forest(_t, forest_index): # Note: We can't use RandomForest.predictLabels() here because we're training in parallel, # and we have to average the PROBABILITIES from all forests. # Averaging the label predictions from each forest is NOT equivalent. # For details please see wikipedia: # http://en.wikipedia.org/wiki/Electoral_College_%28United_States%29#Irrelevancy_of_national_popular_vote # (^-^) prob_predictions[_t][forest_index] = forests[forest_index].predictProbabilities(feats[_t]) # predict the data with all the forests in parallel pool = RequestPool() for t in times: if t in self.prob_cache: continue for i, f in enumerate(forests): req = Request( partial(predict_forest, t, i) ) pool.add(req) pool.wait() pool.clean() for t in times: if t not in self.prob_cache: # prob_predictions is a dict-of-lists-of-arrays, indexed as follows: # prob_predictions[t][forest_index][object_index, class_index] # Stack the forests together and average them. stacked_predictions = numpy.array( prob_predictions[t] ) averaged_predictions = numpy.average( stacked_predictions, axis=0 ) assert averaged_predictions.shape[0] == len(feats[t]) self.prob_cache[t] = averaged_predictions self.prob_cache[t][0] = 0 # Background probability is always zero if slot == self.Probabilities: return { t : self.prob_cache[t] for t in times } elif slot == self.Predictions: # FIXME: Support SegmentationThreshold again... labels = { t : 1 + numpy.argmax(self.prob_cache[t], axis=1) for t in times } for t in times: labels[t][0] = 0 # Background gets the zero label return labels elif slot == self.ProbabilityChannels: prob_single_channel = {t: self.prob_cache[t][:, subindex[0]] for t in times} return prob_single_channel else: assert False, "Unknown input slot"
def _execute_graphcut(self, roi, result): for i in (0, 4): assert roi.stop[i] - roi.start[i] == 1,\ "Invalid roi for graph-cut: {}".format(str(roi)) t = roi.start[0] c = roi.start[4] margin = self.Margin.value beta = self.Beta.value MAXBOXSIZE = 10000000 # FIXME justification?? ## request the bounding box coordinates ## # the trailing index brackets give us the dictionary (instead of an # array of size 1) feats = self.BoundingBoxes.get(roi).wait() mins = feats["Coord<Minimum>"] maxs = feats["Coord<Maximum>"] nobj = mins.shape[0] # these are indices, so they should have an index datatype mins = mins.astype(np.uint32) maxs = maxs.astype(np.uint32) ## request the prediction image ## pred = self.Prediction.get(roi).wait() pred = vigra.taggedView(pred, axistags=self.Prediction.meta.axistags) pred = pred.withAxes(*'xyz') ## request the connected components image ## cc = self.LabelImage.get(roi).wait() cc = vigra.taggedView(cc, axistags=self.LabelImage.meta.axistags) cc = cc.withAxes(*'xyz') # provide xyz view for the output (just need 8bit for segmentation resultXYZ = vigra.taggedView(np.zeros(cc.shape, dtype=np.uint8), axistags='xyz') def processSingleObject(i): logger.debug("processing object {}".format(i)) # maxs are inclusive, so we need to add 1 xmin = max(mins[i][0]-margin[0], 0) ymin = max(mins[i][1]-margin[1], 0) zmin = max(mins[i][2]-margin[2], 0) xmax = min(maxs[i][0]+margin[0]+1, cc.shape[0]) ymax = min(maxs[i][1]+margin[1]+1, cc.shape[1]) zmax = min(maxs[i][2]+margin[2]+1, cc.shape[2]) ccbox = cc[xmin:xmax, ymin:ymax, zmin:zmax] resbox = resultXYZ[xmin:xmax, ymin:ymax, zmin:zmax] nVoxels = ccbox.size if nVoxels > MAXBOXSIZE: #problem too large to run graph cut, assign to seed logger.warn("Object {} too large for graph cut.".format(i)) resbox[ccbox == i] = 1 return probbox = pred[xmin:xmax, ymin:ymax, zmin:zmax] gcsegm = segmentGC(probbox, beta) gcsegm = vigra.taggedView(gcsegm, axistags='xyz') ccsegm = vigra.analysis.labelVolumeWithBackground( gcsegm.astype(np.uint8)) # Extended bboxes of different objects might overlap. # To avoid conflicting segmentations, we find all connected # components in the results and only take the one, which # overlaps with the object "core" or "seed", defined by the # pre-thresholding seed = ccbox == i filtered = seed*ccsegm passed = vigra.analysis.unique(filtered.astype(np.uint32)) assert len(passed.shape) == 1 if passed.size > 2: logger.warn("ambiguous label assignment for region {}".format( (xmin, xmax, ymin, ymax, zmin, zmax))) resbox[ccbox == i] = 1 elif passed.size <= 1: logger.warn( "box {} segmented out with beta {}".format(i, beta)) else: # assign to the overlap region label = passed[1] # 0 is background resbox[ccsegm == label] = 1 pool = RequestPool() #FIXME make sure that the parallel computations fit into memory for i in range(1, nobj): req = Request(functools.partial(processSingleObject, i)) pool.add(req) logger.info("Processing {} objects ...".format(nobj-1)) pool.wait() pool.clean() logger.info("object loop done") # prepare result resView = vigra.taggedView(result, axistags=self.Output.meta.axistags) resView = resView.withAxes(*'xyz') # some labels could have been removed => relabel vigra.analysis.labelVolumeWithBackground(resultXYZ, out=resView)