def execute(self, slot, subindex, roi, result): t = time.time() assert self._h5N5File is not None # Read the desired data directly from the hdf5File key = roi.toSlice() h5N5File = self._h5N5File internalPath = self.InternalPath.value timer = None if logger.isEnabledFor(logging.DEBUG): logger.debug(f"Reading HDF5/N5 block: [{roi.start}, {roi.stop}]") timer = Timer() timer.unpause() if result.flags.c_contiguous: h5N5File[internalPath].read_direct(result[...], key) else: result[...] = h5N5File[internalPath][key] if logger.getEffectiveLevel() >= logging.DEBUG: t = 1000.0 * (time.time() - t) logger.debug("took %f msec." % t) if timer: timer.pause() logger.debug(f"Completed HDF5 read in {timer.seconds()} seconds: [{roi.start}, {roi.stop}]")
def execute(self, slot, subindex, roi, result): assert all(roi.stop <= self.Input.meta.shape),\ "Requested roi {} is too large for this input image of shape {}.".format(roi, self.Input.meta.shape) # Determine how much input data we'll need, and where the result will be # relative to that input roi # inputRoi is a 5d roi, computeRoi depends on the number of singletons # in shape, but is at most 3d inputRoi, computeRoi = self._getInputComputeRois(roi) # Obtain the input data with Timer() as resultTimer: data = self.Input(*inputRoi).wait() logger.debug("Obtaining input data took {} seconds for roi {}".format( resultTimer.seconds(), inputRoi)) data = vigra.taggedView(data, axistags='tzyxc') # input is in tzyxc order tIndex = 0 cIndex = 4 # Must be float32 if data.dtype != numpy.float32: data = data.astype(numpy.float32) # we need to remove a singleton z axis, otherwise we get # 'kernel longer than line' errors ts = self.Input.meta.getTaggedShape() tags = [k for k in 'zyx' if ts[k] > 1] sigma = [self._sigmas[k] for k in tags] # Check if we need to smooth if any([x < 0.1 for x in sigma]): # just pipe the input through result[...] = data return for i, t in enumerate(range(roi.start[tIndex], roi.stop[tIndex])): for j, c in enumerate(range(roi.start[cIndex], roi.stop[cIndex])): # prepare the result as an argument resview = vigra.taggedView(result[i, ..., j], axistags='zyx') dataview = data[i, ..., j] # TODO make this general, not just for z axis resview = resview.withAxes(*tags) dataview = dataview.withAxes(*tags) # Smooth the input data vigra.filters.gaussianSmoothing(dataview, sigma, window_size=2.0, roi=computeRoi, out=resview)
def execute(self, slot, subindex, roi, result): edge_probabilities = self.EdgeProbabilities.value rag = self.Rag.value beta = self.Beta.value solver_name = self.SolverName.value with Timer() as timer: agglomerated_labels = self.agglomerate_with_multicut( rag, edge_probabilities, beta, solver_name) logger.info("'{}' Multicut took {} seconds".format( solver_name, timer.seconds())) result[:] = agglomerated_labels[..., None]
def _train_forests_with_feature_importance(forests, X, y, feature_names, export_path=None): """ Train all RFs (in parallel) and compute feature importances while doing so. The importances table will be logged as INFO, and also exported to a file if export_path is given. Returns: oobs and importances """ oobs = [None] * len(forests) importances = [None] * len(forests) def store_training_results(i, training_results): oob, importance_results = training_results oobs[i] = oob importances[i] = importance_results with Timer() as train_timer: pool = RequestPool() for i, forest in enumerate(forests): req = Request(partial(forest.learnRFWithFeatureSelection, X, y)) # save the training results req.notify_finished(partial(store_training_results, i)) pool.add(req) pool.wait() logger.info("Training took, {} seconds".format(train_timer.seconds())) # Forests may have different numbers of trees, # so take a weighted average of their importances tree_counts = [f.treeCount() for f in forests] weights = numpy.array(tree_counts).astype(float) weights /= weights.sum() named_importances = collections.OrderedDict( list( zip(feature_names, numpy.average(importances, weights=weights, axis=0)))) importance_table = generate_importance_table(named_importances, sort="overall", export_path=export_path) logger.info( "Feature importance measurements during training: \n{}".format( importance_table)) return oobs, named_importances
def execute(self, slot, subindex, roi, result): assert all(roi.stop <= self.Input.meta.shape), "Requested roi {} is too large for this input image of shape {}.".format( roi, self.Input.meta.shape ) # Determine how much input data we'll need, and where the result will be relative to that input roi inputRoi, computeRoi = self._getInputComputeRois(roi) # Obtain the input data with Timer() as resultTimer: data = self.Input( *inputRoi ).wait() logger.debug("Obtaining input data took {} seconds for roi {}".format( resultTimer.seconds(), inputRoi )) xIndex = self.Input.meta.axistags.index('x') yIndex = self.Input.meta.axistags.index('y') zIndex = self.Input.meta.axistags.index('z') if self.Input.meta.axistags.index('z')<len(self.Input.meta.shape) else None cIndex = self.Input.meta.axistags.index('c') if self.Input.meta.axistags.index('c')<len(self.Input.meta.shape) else None # Must be float32 if data.dtype != numpy.float32: data = data.astype(numpy.float32) axiskeys = self.Input.meta.getAxisKeys() spatialkeys = filter( lambda k: k in 'xyz', axiskeys ) # we need to remove a singleton z axis, otherwise we get # 'kernel longer than line' errors reskey = [slice(None, None, None)]*len(self.Input.meta.shape) reskey[cIndex]=0 if zIndex and self.Input.meta.shape[zIndex]==1: removedZ = True data = data.reshape((data.shape[xIndex], data.shape[yIndex])) reskey[zIndex]=0 spatialkeys = filter( lambda k: k in 'xy', axiskeys ) else: removedZ = False sigma = map(self._sigmas.get, spatialkeys) #Check if we need to smooth if any([x < 0.1 for x in sigma]): if removedZ: resultXY = vigra.taggedView(result, axistags="".join(axiskeys)) resultXY = resultXY.withAxes(*'xy') resultXY[:] = data else: result[:] = data return result # Smooth the input data smoothed = vigra.filters.gaussianSmoothing(data, sigma, window_size=2.0, roi=computeRoi, out=result[tuple(reskey)]) # FIXME: Assumes channel is last axis expectedShape = tuple(TinyVector(computeRoi[1]) - TinyVector(computeRoi[0])) assert tuple(smoothed.shape) == expectedShape, "Smoothed data shape {} didn't match expected shape {}".format( smoothed.shape, roi.stop - roi.start ) return result
def export_from_tiled_volume(tiles_description_json_path, roi, output_hdf5_path, output_dataset_name): """ Export a cutout volume from a TiledVolume into an hdf5 dataset. Args: tiles_description_json_path: path to the TiledVolume's json description file. roi: The (start, stop) corners of the cutout region to export. (Must be tuple-of-tuples.) output_hdf5_path: The HDF5 file to export to. output_dataset_name: The name of the HDF5 dataset to write. Will be deleted first if necessary. """ if not os.path.exists(tiles_description_json_path): raise Exception("Description file does not exist: " + tiles_description_json_path) start, stop = numpy.array(roi) shape = tuple(stop - start) tiled_volume = TiledVolume(tiles_description_json_path) with Timer() as timer: result_array = numpy.ndarray(shape, tiled_volume.description.dtype) logger.info("Reading cutout volume of shape: {}".format(shape)) tiled_volume.read((start, stop), result_out=result_array) logger.info("Writing data to: {}/{}".format(output_hdf5_path, output_dataset_name)) with h5py.File(output_hdf5_path, 'a') as output_h5_file: if output_dataset_name in output_h5_file: del output_h5_file[output_dataset_name] dset = output_h5_file.create_dataset(output_dataset_name, shape, result_array.dtype, chunks=True, data=result_array) try: import vigra except ImportError: pass else: # Attach axistags to the exported dataset, so ilastik # automatically interprets the volume correctly. output_axes = tiled_volume.description.output_axes dset.attrs['axistags'] = vigra.defaultAxistags( output_axes).toJSON() logger.info("Exported {:.1e} pixels in {:.1f} seconds.".format( numpy.prod(shape), timer.seconds()))
def _train_forests(forests, X, y): """ Train all RFs (in parallel), and return the oobs. """ oobs = [None] * len(forests) def store_oob_results(i, oob): oobs[i] = oob with Timer() as train_timer: pool = RequestPool() for i, forest in enumerate(forests): req = Request( partial(forest.learnRF, X, y) ) # save the oob results req.notify_finished( partial( store_oob_results, i ) ) pool.add( req ) pool.wait() logger.info("Training took, {} seconds".format( train_timer.seconds() ) ) return oobs
def create_and_train(self, X, y, feature_names=None): # Distribute trees as evenly as possible tree_counts = numpy.array([self._num_trees // self._num_forests] * self._num_forests) tree_counts[:self._num_trees % self._num_forests] += 1 assert tree_counts.sum() == self._num_trees tree_counts = map(int, tree_counts) tree_counts[:] = (tree_count for tree_count in tree_counts if tree_count != 0) logger.debug("Training parallel vigra RF") # Save for future reference known_labels = numpy.unique(y) X = numpy.asarray(X, numpy.float32) y = numpy.asarray(y, numpy.uint32) if y.ndim == 1: y = y[:, numpy.newaxis] assert X.ndim == 2 assert len(X) == len(y) # Create N forests forests = [] for tree_count in tree_counts: forests.append( vigra.learning.RandomForest(tree_count, **self._kwargs)) # Train them all in parallel oobs = [None] * len(forests) pool = RequestPool() for i, forest in enumerate(forests): req = Request(partial(forest.learnRF, X, y)) # save the oobs req.notify_finished(partial(oobs.__setitem__, i)) pool.add(req) with Timer() as timer: pool.wait() logger.info("Training completed in {} seconds. Average OOB: {}".format( timer.seconds(), numpy.average(oobs))) return ParallelVigraRfLazyflowClassifier(forests, oobs, known_labels, feature_names)
def execute(self, slot, subindex, roi, result): rag = self.Rag.value beta = self.Beta.value solver_name = self.SolverName.value edge_probabilities = self.EdgeProbabilities.value if edge_probabilities is None: # No probabilities cached yet. Merge everything result[0] = np.zeros((rag.max_sp + 1,), dtype=np.uint32) return with Timer() as timer: node_labeling = self.agglomerate_with_multicut( rag, edge_probabilities, beta, solver_name) logger.info("'{}' Multicut took {} seconds".format( solver_name, timer.seconds())) # FIXME: Is it okay to produce 0-based supervoxels? # node_labeling[:] += 1 # RAG labels are 0-based, but we want 1-based result[0] = node_labeling
def execute(self, slot, subindex, roi, result): assert slot == self.Output, "Unknown output slot: {}".format(slot.name) if self.SelectedLabel.value == 0: # Special case: Label zero selects nothing. result[:] = 0 return with Timer() as timer: # Can't use writeInto() here because dtypes don't match. inputLabels = self.Input(roi.start, roi.stop).wait() # Use two in-place bitwise operations instead of numpy.where # This avoids the temporary variable created by (inputLabels == x) #result[:] = numpy.where( inputLabels == self.SelectedLabel.value, 1, 0 ) numpy.bitwise_xor(inputLabels, self.SelectedLabel.value, out=inputLabels) # All numpy.logical_not(inputLabels, out=inputLabels) result[:] = inputLabels # Copy from uint32 to uint8 logger.debug("OpSelectLabel took {} seconds for roi {}".format( timer.seconds(), roi)) return result
def execute(self, slot, subindex, roi, result): classifier = self.Classifier.value # Training operator may return 'None' if there was no data to train with skip_prediction = classifier is None # Shortcut: If the mask is totally zero, skip this request entirely if not skip_prediction and self.PredictionMask.ready(): mask_roi = numpy.array((roi.start, roi.stop)) mask_roi[:, -1:] = [[0], [1]] start, stop = list(map(tuple, mask_roi)) mask = self.PredictionMask(start, stop).wait() skip_prediction = not numpy.any(mask) del mask if skip_prediction: result[:] = 0.0 return result assert issubclass( type(classifier), LazyflowVectorwiseClassifierABC ), "Classifier is of type {}, which does not satisfy the LazyflowVectorwiseClassifierABC interface.".format( type(classifier)) key = roi.toSlice() newKey = key[:-1] newKey += (slice(0, self.Image.meta.shape[-1], None), ) with Timer() as features_timer: input_data = self.Image[newKey].wait() input_data = numpy.asarray(input_data, numpy.float32) shape = input_data.shape prod = numpy.prod(shape[:-1]) features = input_data.reshape((prod, shape[-1])) features = self.SupervoxelFeatures.value # print("features before prediction {}".format(features)) # features = get_supervoxel_features(features, self.SupervoxelSegmentation.value) # import ipdb; ipdb.set_trace() with Timer() as prediction_timer: probabilities = classifier.predict_probabilities(features) # import ipdb; ipdb.set_trace() probabilities = slic_to_mask(self.SupervoxelSegmentation.value, probabilities).reshape( -1, probabilities.shape[-1]) logger.debug( "Features took {} seconds, Prediction took {} seconds for roi: {} : {}" .format(features_timer.seconds(), prediction_timer.seconds(), roi.start, roi.stop)) assert probabilities.shape[1] <= self.PMaps.meta.shape[-1], ( "Error: Somehow the classifier has more label classes than expected:" " Got {} classes, expected {} classes".format( probabilities.shape[1], self.PMaps.meta.shape[-1])) # We're expecting a channel for each label class. # If we didn't provide at least one sample for each label, # we may get back fewer channels. if probabilities.shape[1] < self.PMaps.meta.shape[-1]: # Copy to an array of the correct shape # This is slow, but it's an unusual case assert probabilities.shape[-1] == len(classifier.known_classes) full_probabilities = numpy.zeros(probabilities.shape[:-1] + (self.PMaps.meta.shape[-1], ), dtype=numpy.float32) for i, label in enumerate(classifier.known_classes): full_probabilities[:, label - 1] = probabilities[:, i] probabilities = full_probabilities # Reshape to image probabilities.shape = shape[:-1] + (self.PMaps.meta.shape[-1], ) # Copy only the prediction channels the client requested. result[...] = probabilities[..., roi.start[-1]:roi.stop[-1]] return result
def predict_probabilities_pixelwise(self, X, roi, axistags=None): logger.debug("predicting PIXELWISE vigra RF") # This classifier doesn't benefit from any context around the input, (does it?) # so just strip it off and only use the given roi. assert len(roi[0]) == len(roi[1]) == X.ndim - 1 X = X[roi_to_slice(*roi)] FRAME_SPAN = 10 # Number of frames to wait until the mask is recalculated DILATION_RADIUS = 50 # In pixels BACKGROUND_LABEL = 1 # Allocate memory for probability volume and mask prob_vol = numpy.zeros((X.shape[:-1] + (len(self._known_labels),)), dtype=numpy.float32) mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool) frm_cnt = 0 for X_t in X: if frm_cnt % FRAME_SPAN == 0: mask = numpy.ones(bigintprod(X.shape[1:-1]), dtype=numpy.bool) prob_mat = numpy.zeros((bigintprod(X.shape[1:-1]), len(self._known_labels)), dtype=numpy.float32) # Reshape the image into a 2D feature matrix mat_shape = (bigintprod(X_t.shape[:-1]), X_t.shape[-1]) feature_mat = numpy.reshape(X_t, mat_shape) # Mask the feature matrix feature_mat_masked = feature_mat[mask == 1, :] # Run classifier prob_mat_masked = self._vigra_rf.predictProbabilities(feature_mat_masked.view(numpy.ndarray)) prob_mat[mask == 1, :] = prob_mat_masked prob_mat[mask == 0, 0] = 1.0 # Fill background prob_img = prob_mat.reshape((1,) + X_t.shape[:-1] + (prob_mat.shape[-1],)) # Recalculate the mask every 20 frames if frm_cnt % FRAME_SPAN == 0: predicted_labels = numpy.argmax(prob_img[0], axis=-1) + 1 prob_slice = (predicted_labels != BACKGROUND_LABEL).astype(numpy.bool) kernel = numpy.ones((DILATION_RADIUS * 2 + 1), dtype=bool) with Timer() as morpho_timer: prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice, kernel[None, :]) prob_slice_dilated = scipy.ndimage.morphology.binary_dilation(prob_slice_dilated, kernel[:, None]) logger.debug("[PROF] Morphology took {} ".format(morpho_timer.seconds())) mask = prob_slice_dilated.reshape(bigintprod(prob_slice_dilated.shape)) # vigra.impex.writeHDF5(prob_slice_dilated, 'mask.h5', 'data') prob_vol[frm_cnt, :, :, :] = prob_img frm_cnt = frm_cnt + 1 # Reshape into an image. # Choose the prediction image shape carefully: # # Most classifiers omit a channel entirely if there are no labels given for a particular class, # So the number of prediction channels we got is the same as the number of known_classes # But if the classifier attempts to "help us out" by including channels for "missing" labels, # then we want to just return the whole thing. num_probability_channels = max(len(self.known_classes), prob_vol.shape[-1]) prediction_shape = X.shape[:-1] + (num_probability_channels,) return numpy.reshape(prob_vol, prediction_shape)
data = prediction_file['volume/predictions'][:] #[0:50,0:50,0:50,:] # Scale and convert to uint8, then add axistags and drange data = (data * 255).astype(numpy.uint8) data = vigra.taggedView(data, 'xyzc') data.drange = (0, 255) graph = Graph() op = OpVigraWatershedViewer(graph=graph) op.InputImage.setValue(data) op.InputChannelIndexes.setValue([0]) op.WatershedPadding.setValue(0) op.FreezeCache.setValue(False) op.CacheBlockShape.setValue((520, 520)) op.OverrideLabels.setValue({}) op.SeedThresholdValue.setValue(0) op.MinSeedSize.setValue(5) assert op.WatershedLabels.ready() print "Computing watershed..." with Timer() as timer: watershed_labels = op.opWatershed.Output[:].wait() print "Computing watershed took {} seconds".format(timer.seconds()) print "Saving watershed..." with h5py.File('/tmp/watershed_output.h5', 'w') as output_file: output_file.create_dataset('watershed_labels', data=watershed_labels) print "DONE."