def compute_edge_labels(rag, gt, ignore_label=None, n_threads=None): """ Compute edge labels by mapping ground-truth segmentation to graph nodes. Arguments: rag [RegionAdjacencyGraph] - region adjacency graph gt [np.ndarray] - ground-truth segmentation ignore_label [int or np.ndarray] - label id(s) in ground-truth to ignore in learning (default: None) n_threads [int] - number of threads (default: None) """ n_threads = multiprocessing.cpu_count() if n_threads is None else n_threads node_labels = nrag.gridRagAccumulateLabels(rag, gt, n_threads) uv_ids = rag.uvIds() edge_labels = (node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]]).astype('uint8') if ignore_label is not None: mapped_uv_ids = node_labels[uv_ids] edge_mask = np.isin(mapped_uv_ids, ignore_label) edge_mask = edge_mask.sum(axis=1) == 0 assert len(edge_labels) == len(edge_mask) return edge_labels, edge_mask return edge_labels
def learn_rf(): import cremi_tools.segmentation as cseg raw_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/raw_train_normalized.h5' pmap_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/probabilities_train.h5' assert os.path.exists(pmap_path), pmap_path ws_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/overseg_train.h5' assert os.path.exists(ws_path), ws_path # load pmap and watersheds raw = vigra.readHDF5(raw_path, 'data').astype('float32') pmap = vigra.readHDF5(pmap_path, 'data') ws = vigra.readHDF5(ws_path, 'data').astype('uint64') assert ws.shape == pmap.shape # feature extractor and multicut rag = nrag.gridRag(ws, numberOfLabels=int(ws.max() + 1)) # feature extractor and multicut feature_extractor = cseg.FeatureExtractor(True) features = feature_extractor(rag, pmap, ws, raw) gt_path = '/home/papec/Work/neurodata_hdd/fib25/traintest/gt_train.h5' gt = vigra.readHDF5(gt_path, 'data') node_labels = nrag.gridRagAccumulateLabels(rag, gt) uv_ids = rag.uvIds() labels = node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]] assert len(labels) == len(features), "%i, %i" % (len(labels), len(features)) print("learning rf from features", features.shape) rf = RandomForestClassifier(n_jobs=40, n_estimators=500) rf.fit(features, labels) with open('./rf.pkl', 'wb') as f: pickle.dump(rf, f)
def _compute_edge_gt(self, gt, rag, uv_ids, has_defects, inp, out): node_gt = nrag.gridRagAccumulateLabels(rag, gt.get()) u_gt = node_gt[uv_ids[:, 0]] v_gt = node_gt[uv_ids[:, 1]] edge_gt = (u_gt != v_gt).astype('uint8') assert (np.unique(edge_gt) == np.array([0, 1])).all(), str( np.unique(edge_gt)) assert edge_gt.shape[0] == uv_ids.shape[0] # write the edge gts for all the different edge types edge_transition = rag.totalNumberOfInSliceEdges out.write(edge_gt, 'edge_gt') out.write(edge_gt[:edge_transition], 'edge_gt_xy') if has_defects: mod_adjacency = inp["modified_adjacency"] skip_transition = rag.numberOfEdges - mod_adjacency.read( "delete_edges").shape[0] out.write(edge_gt[edge_transition:skip_transition], 'edge_gt_z') out.write(edge_gt[skip_transition:], 'edge_gt_skip') else: out.write(edge_gt[edge_transition:], 'edge_gt_z') return u_gt, v_gt
def edge_labels(rag, gt): uv_ids = rag.uvIds() node_labels = nrag.gridRagAccumulateLabels(rag, gt) edge_labels = (node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]]).astype('uint8') edge_mask = (node_labels[uv_ids] != 0).all(axis=1) print(np.sum(edge_mask), "edges of", len(uv_ids), "are valid") return edge_labels, edge_mask
def run(self): inp = self.input() rag = inp['rag'].read() defect_gt = inp['defect_gt'] defect_gt.open() node_labels = nrag.gridRagAccumulateLabels(rag, defect_gt.get()) assert (np.unique(node_labels) == np.array([0, 1])).all(), str( np.unique(node_labels)) self.output().write(node_labels)
def _compute_and_check_expected(self, ws, inp, res, exclude=None): self.assertFalse((res == 0).all()) rag = nrag.gridRag(ws, numberOfLabels=int(ws.max() + 1)) expected = nrag.gridRagAccumulateLabels(rag, inp) if exclude is not None: res = res[exclude] expected = expected[exclude] self.assertEqual(res.shape, expected.shape) self.assertTrue(np.allclose(res, expected))
def get_labels(path, n_threads=20): print("Loading watershed") ws = z5py.File(path)['segmentations/watershed'][:] print("Computing Rag") rag = nrag.gridRag(ws, numberOfLabels=int(ws.max()) + 1, numberOfThreads=n_threads) uvs = rag.uvIds() valid_edges = (uvs != 0).all(axis=1) print("Loading groundtruth") gt = z5py.File(path)['segmentations/groundtruth'][:] print("Accumulating labels") node_labels = nrag.gridRagAccumulateLabels(rag, gt) labels = (node_labels[uvs[:, 0]] != node_labels[uvs[:, 1]]).view('uint8') assert labels.shape == valid_edges.shape, "%s, %s" % (str( labels.shape), str(valid_edges.shape)) return labels, valid_edges
def extract_features_and_labels(sample): offsets = [[-1, 0, 0], [0, -1, 0], [0, 0, -1], [-2, 0, 0], [0, -3, 0], [0, 0, -3], [-3, 0, 0], [0, -9, 0], [0, 0, -9], [-4, 0, 0], [0, -27, 0], [0, 0, -27]] path = '/home/papec/mnt/papec/Work/neurodata_hdd/cremi_warped/sample%s_train.n5' % sample f = z5py.File(path) ws = f['segmentations/watershed'][:] rag = nrag.gridRag(ws, numberOfLabels=int(ws.max()) + 1) affs = 1. - f['predictions/full_affs'][:] lifted_uvs, local_features, lifted_features = nrag.computeFeaturesAndNhFromAffinities( rag, affs, offsets) gt = f['segmentations/groundtruth'][:] node_labels = nrag.gridRagAccumulateLabels(rag, gt) uv_ids = rag.uvIds() local_valid_edges = (node_labels[uv_ids] != 0).all(axis=1) local_labels = (node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]]).astype('uint8') assert len(local_features) == len( local_labels), "%i, %i" % (len(local_features), len(local_labels)) lifted_valid_edges = (node_labels[lifted_uvs] != 0).all(axis=1) lifted_labels = (node_labels[lifted_uvs[:, 0]] != node_labels[lifted_uvs[:, 1]]).astype('uint8') assert len(lifted_features) == len( lifted_labels), "%i, %i" % (len(lifted_features), len(lifted_labels)) print("Number of valid local edges", np.sum(local_valid_edges), local_valid_edges.size) print("Number of valid lifted edges", np.sum(lifted_valid_edges), lifted_valid_edges.size) local_labels = local_labels[local_valid_edges] local_features = local_features[local_valid_edges] assert len(local_features) == len( local_labels), "%i, %i" % (len(local_features), len(local_labels)) lifted_labels = lifted_labels[lifted_valid_edges] lifted_features = lifted_features[lifted_valid_edges] assert len(lifted_features) == len( lifted_labels), "%i, %i" % (len(lifted_features), len(lifted_labels)) return local_labels, local_features, lifted_labels, lifted_features
def gt_projection(block_id): ws_path = '/home/papec/Work/neurodata_hdd/fib25/watersheds/watershed_block%i.h5' % block_id ws = vigra.readHDF5(ws_path, 'data') ws = vigra.analysis.labelVolume(ws.astype('uint32')) gt = vigra.readHDF5('/home/papec/Work/neurodata_hdd/fib25/gt/gt_block%i.h5' % block_id, 'data') rag = nrag.gridRag(ws, numberOfLabels=int(ws.max()) + 1) labeling = nrag.gridRagAccumulateLabels(rag, gt) projected = Volume(nrag.projectScalarNodeDataToPixels(rag, labeling)) metrics = NeuronIds(Volume(gt)) vi_s, vi_m = metrics.voi(projected) are = metrics.adapted_rand(projected) print(vi_s) print(vi_m) print(are) print() os.remove(ws_path) vigra.writeHDF5(ws, ws_path, 'data', compression='gzip')
def extract_feats_and_labels(path, aff_key, ws_key, gt_key, mask_key, n_threads=40, learn_2_rfs=True, with_glia=False): f = z5py.File(path) # load the watershed segmentation and compute rag ds_seg = f[ws_key] ds_seg.n_threads = n_threads seg = ds_seg[:] n_labels = int(seg.max()) + 1 rag = nrag.gridRag(seg, numberOfLabels=n_labels, numberOfThreads=n_threads) uv_ids = rag.uvIds() # load affinities and glia channel ds_affs = f[aff_key] ds_affs.n_threads = n_threads affs = ds_affs[:3] if affs.dtype == np.dtype('uint8'): affs = affs.astype('float32') / 255. affs = 1. - affs # TODO enable splitting xy and z features # get the edge features features, _, z_edges = feat.edge_features(rag, seg, n_labels, uv_ids, affs, n_threads=n_threads) # glia features if with_glia: print("Computing glia features") n_chans = ds_affs.shape[0] glia_slice = slice(n_chans - 1, n_chans) glia = ds_affs[glia_slice] if glia.dtype == np.dtype('uint8'): glia = glia.astype('float32') / 255. np.concatenate( [features, feat.region_features(seg, uv_ids, glia)], axis=1) # load mask and groundtruth ds_mask = f[mask_key] ds_mask.n_threads = n_threads mask = ds_mask[:] ds_gt = f[gt_key] ds_gt.n_threads = n_threads gt = ds_gt[:] gt[np.logical_not(mask)] = 0 # compute the edge labels and valid edges node_labels = nrag.gridRagAccumulateLabels(rag, gt) labels = (node_labels[uv_ids[:, 0]] != node_labels[uv_ids[:, 1]]).astype('uint8') valid_edges = (node_labels[uv_ids] != 0).all(axis=1) print(np.sum(valid_edges), "edges of", len(uv_ids), "are valid") assert features.shape[0] == labels.shape[0] # just for temporary inspection, deactivate ! import vigra vigra.writeHDF5(features, './feats_tmp.h5', 'data', chunks=True) vigra.writeHDF5(labels, './labs_tmp.h5', 'data', chunks=True) if learn_2_rfs: features = features[valid_edges] labels = labels[valid_edges] z_edges = z_edges[valid_edges] return (features[np.logical_not(z_edges)], features[z_edges], labels[np.logical_not(z_edges)], labels[z_edges]) else: return features[valid_edges], labels[valid_edges]
def active_edges(self, seg): nodes = nrag.gridRagAccumulateLabels(self.rag, seg) return nodes[self.uv_ids[:, 0]] != nodes[self.uv_ids[:, 1]]
def compute_edge_groundtuth(rag, gt_path, gt_key): gt = read_hdf5(gt_path, gt_key) node_gt = nrag.gridRagAccumulateLabels(rag, gt) uv_ids = rag.uvIds() edge_gt = node_gt[uv_ids[:, 0]] != node_gt[uv_ids[:, 1]] return edge_gt
def extract_feats_and_labels(path, aff_key, ws_key, gt_key, mask_key, lifted_nh, offsets=[[-1, 0, 0], [0, -1, 0], [0, 0, -1]], n_threads=40): f = z5py.File(path) # load the watershed segmentation and compute rag ds_seg = f[ws_key] ds_seg.n_threads = n_threads seg = ds_seg[:] print(seg.shape) n_labels = int(seg.max()) + 1 rag = nrag.gridRag(seg, numberOfLabels=n_labels, numberOfThreads=n_threads) # load affinities and glia channel ds_affs = f[aff_key] ds_affs.n_threads = n_threads aff_slice = slice(0, len(offsets)) affs = ds_affs[aff_slice] if affs.dtype == np.dtype('uint8'): affs = affs.astype('float32') / 255. affs = 1. - affs n_chans = ds_affs.shape[0] glia_slice = slice(n_chans - 1, n_chans) glia = ds_affs[glia_slice] if glia.dtype == np.dtype('uint8'): glia = glia.astype('float32') / 255. # compute local probs from affinities print("Computing local probabilities") probs = nrag.accumulateAffinityStandartFeatures( rag, affs, offsets, numberOfThreads=n_threads)[:, 0] probs = np.nan_to_num(probs) # remove zero-label (== ignore label) from the graph, because it short-circuits # lifted edges uv_ids = rag.uvIds() valid_edges = (uv_ids != 0).all(axis=1) uv_ids = uv_ids[valid_edges] probs = probs[valid_edges] # compute the lifted graph and lifted features print("Computing lifted objective") lifted_uv_ids = feat.make_filtered_lifted_nh(rag, n_labels, uv_ids, lifted_nh) graph = nifty.graph.undirectedGraph(n_labels) graph.insertEdges(uv_ids) # TODO parallelize some of these print("Computing lifted features") features = np.concatenate( [ # feat.ucm_features(n_labels, lifted_objective, probs), feat.clustering_features(graph, probs, lifted_uv_ids), feat.ucm_features(n_labels, uv_ids, lifted_uv_ids, probs), feat.region_features(seg, lifted_uv_ids, glia) ], axis=1) # load mask and groundtruth ds_mask = f[mask_key] ds_mask.n_threads = n_threads mask = ds_mask[:] ds_gt = f[gt_key] ds_gt.n_threads = n_threads gt = ds_gt[:] gt[np.logical_not(mask)] = 0 # compute the edge labels and valid edges node_labels = nrag.gridRagAccumulateLabels(rag, gt) labels = (node_labels[lifted_uv_ids[:, 0]] != node_labels[lifted_uv_ids[:, 1]]).astype('uint8') valid_edges = (node_labels[lifted_uv_ids] != 0).all(axis=1) print(np.sum(valid_edges), "edges of", len(lifted_uv_ids), "are valid") assert features.shape[0] == labels.shape[0] # just for temporary inspection, deactivate ! import vigra vigra.writeHDF5(features, './feats_tmp.h5', 'data', chunks=True) vigra.writeHDF5(labels, './labs_tmp.h5', 'data', chunks=True) return features[valid_edges], labels[valid_edges]