def test_no_dam_agglomeration(): i = 3 g = agglo.Rag(wss[i], probs[i], agglo.boundary_mean, normalize_probabilities=True) g.agglomerate(0.75) assert_array_equal(g.get_segmentation(), results[i], 'No dam agglomeration failed.')
def test_snemi(): feat = default.snemi3d() g = agglo.Rag(ws, prob, feature_manager=feat, use_slow=True) # contact are edge features, so they are inserted just before the 8 # difference features in the base paper_em vector. expected = np.concatenate((ans12[:-8], contact, ans12[-8:])) assert_allclose(feat(g, 1, 2), expected, atol=0.01)
def test_generate_examples_1_channel(): """Run a flat epoch and an active epoch of learning, compare learned sets. The *order* of the edges learned by learn_flat is not guaranteed, so we test the *set* of learned edges for the flat epoch. The learned epoch *should* have a fixed order, so we test array equality. Uses 1 channel probabilities. """ g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) _, alldata = g_train.learn_agglomerate(gt_train, fc, classifier='naive bayes') testfn = ('example-data/train-naive-bayes-merges1-py3.pck' if PYTHON_VERSION == 3 else 'example-data/train-naive-bayes-merges1-py2.pck') exp0, exp1 = load_pickle(os.path.join(rundir, testfn)) expected_edges = set(map(tuple, exp0)) edges = set(map(tuple, alldata[0][3])) merges = alldata[1][3] assert edges == expected_edges # concordant is the maximum edges concordant in the Python 2.7 version. # The remaining edges diverge because of apparent differences # between Linux and OSX floating point handling. concordant = slice(None, 171) if PYTHON_VERSION == 2 else slice(None) assert_array_equal(merges[concordant], exp1[concordant]) nb = GaussianNB().fit(alldata[0][0], alldata[0][1][:, 0]) nbexp = joblib.load(os.path.join(rundir, 'example-data/naive-bayes-1.joblib')) assert_allclose(nb.theta_, nbexp.theta_, atol=1e-10) assert_allclose(nb.sigma_, nbexp.sigma_, atol=1e-4) assert_allclose(nb.class_prior_, nbexp.class_prior_, atol=1e-7)
def test_generate_examples_4_channel(): """Run a flat epoch and an active epoch of learning, compare learned sets. The *order* of the edges learned by learn_flat is not guaranteed, so we test the *set* of learned edges for the flat epoch. The learned epoch *should* have a fixed order, so we test array equality. Uses 4 channel probabilities. """ g_train = agglo.Rag(ws_train, p4_train, feature_manager=fc) _, alldata = g_train.learn_agglomerate(gt_train, fc, classifier='naive bayes') testfn = ('example-data/train-naive-bayes-merges4-py3.pck' if PYTHON_VERSION == 3 else 'example-data/train-naive-bayes-merges4-py2.pck') exp0, exp1 = load_pickle(os.path.join(rundir, testfn)) expected_edges = set(map(tuple, exp0)) edges = set(map(tuple, alldata[0][3])) merges = alldata[1][3] assert edges == expected_edges assert_array_equal(merges, exp1) nb = GaussianNB().fit(alldata[0][0], alldata[0][1][:, 0]) nbexp = joblib.load(os.path.join(rundir, 'example-data/naive-bayes-4.joblib')) assert_allclose(nb.theta_, nbexp.theta_, atol=1e-10) assert_allclose(nb.sigma_, nbexp.sigma_, atol=1e-4) assert_allclose(nb.class_prior_, nbexp.class_prior_, atol=1e-7)
def test_split_vi(): labels = [[1, 0, 2], [1, 0, 2], [1, 0, 2]] g = agglo.Rag(np.array(labels)) vi0 = g.split_vi(np.array(labels)) g.set_ground_truth(np.array(labels)) vi1 = g.split_vi() assert np.all(vi0 == vi1)
def test_float_watershed(): """Ensure float arrays passed as watersheds don't crash everything.""" p = np.array([[1., 0.], [0., 1.]]) ws = np.array([[1, 2], [3, 4]], np.float32) g = agglo.Rag(ws, p, connectivity=2) assert_equal(agglo.boundary_mean(g, 1, 2), 0.5) assert_equal(agglo.boundary_mean(g, 1, 4), 1.0)
def test_generate_gala_examples_fast_updateedges(dummy_data_fast): """As `test_generate_lash_examples`, but using strict learning. """ frag, gt, g, fman = dummy_data_fast g = agglo.Rag(frag, feature_manager=fman, update_unchanged_edges=True) np.random.seed(99) summary, allepochs = g.learn_agglomerate(gt, fman, learning_mode='strict', classifier='logistic regression') feat, target, weights, edges = summary ffeat, ftarget, fweights, fedges = allepochs[0] # flat lr = LR().fit(feat, target[:, 0]) flr = LR().fit(ffeat, ftarget[:, 0]) def pred(v): return lr.predict_proba([v])[0, 1] def fpred(v): return flr.predict_proba([v])[0, 1] assert len(allepochs[1][0]) > 15 # number of merges is more than LASH # approx. same learning results at (0., 0.) and (1., 0.) assert_allclose(fpred([0, 0]), 0.2, atol=0.2) assert_allclose(pred([0, 0]), 0.2, atol=0.2) assert_allclose(fpred([1, 0]), 0.65, atol=0.15) assert_allclose(pred([1, 0]), 0.65, atol=0.15) # difference between agglomerative and flat learning in point (0., 1.); # greater separation than with LASH assert_allclose(fpred([0, 1]), 0.2, atol=0.15) assert_allclose(pred([0, 1]), 0.7, atol=0.15)
def test_2_connectivity(): p = np.array([[1., 0.], [0., 1.]]) ws = np.array([[1, 2], [3, 4]], np.uint32) g = agglo.Rag(ws, p, connectivity=2, use_slow=True) assert_equal(agglo.boundary_mean(g, [[1, 2]]), [0.5]) assert_equal(agglo.boundary_mean(g, [[1, 4]]), [1.0]) assert_equal(agglo.boundary_mean(g, [[1, 2], [1, 4]]), [0.5, 1.0])
def test_training_1channel(): g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) np.random.RandomState(0) (X, y, w, merges) = g_train.learn_agglomerate(gt_train, fc)[0] X_expected, y_expected = load_training_data('example-data/train-set.npz') assert_allclose(X, X_expected, atol=1e-6) assert_allclose(y, y_expected, atol=1e-6)
def _train_model(self, model_file=None): print("Creating GALA feature manager...") fm = features.moments.Manager() fh = features.histogram.Manager(25, 0, 1, [0.1, 0.5, 0.9]) # Recommended numbers in the repo fg = features.graph.Manager() fc = features.contact.Manager() self.fm = features.base.Composite(children=[fm, fh, fg, fc]) if model_file is not None and os.path.isfile(model_file): print('Loading model from path ...') rf = classify.load_classifier(model_file) else: gt, pr, sv = (map(imio.read_h5_stack, [self.gt, self.mem, self.sp])) print("Creating training RAG...") g_train = agglo.Rag(sv, pr, feature_manager=self.fm) print("Learning agglomeration...") (X, y, w, merges) = g_train.learn_agglomerate(gt, self.fm, learning_mode='permissive', min_num_epochs=self.min_ep)[0] y = y[:, 0] rf = classify.DefaultRandomForest().fit(X, y) # Save if path requested if model_file is not None: classify.save_classifier(rf, model_file) self.model = agglo.classifier_probability(self.fm, rf)
def _extract_specific(self, sp, mem, path): """ Generates merge tree given a pair of matrices representing superpixels and membrane views of an image """ def file_to_h5(img, suffix): tmp_file = os.path.join(self.tmp, suffix + '.h5') with h5py.File(tmp_file) as h5: h5.create_dataset('stack', data=np.expand_dims(img, 0), compression='gzip') return tmp_file # Map images into H5 sets iden = os.path.splitext(os.path.basename(path))[0] sp_path = file_to_h5(sp, iden + '_superpixels.h5') mem_path = file_to_h5(mem, iden + '_membranes.h5') sv, pr = (map(imio.read_h5_stack, [sp_path, mem_path])) # Create test rag given pair of images g_test = agglo.Rag(sv, pr, feature_manager=self.fm, merge_priority_function=self.model) # We use a threshold value > 1.0 (maximum) to retrieve the complete # merge tree history, _, _ = g_test.agglomerate(5.0, save_history=True) # Gala internally relabels the regions. Use inverse mapping to get to # original labels. If mapping equals original, no need to relabel (gets # erros, indeed) if not g_test.is_sequential: mapping = g_test.inverse_watershed_map history = relabel_history(history, mapping) dump_history(history, path) # Clean auxiliar os.remove(sp_path) os.remove(mem_path) return path
def test_traverse(): labels = [[0, 1, 2], [0, 1, 2], [0, 1, 2]] g = agglo.Rag(np.array(labels)) assert g.traversing_bodies() == [1] assert g.non_traversing_bodies() == [0, 2]
def test_no_dam_agglomeration(): i = 3 g = agglo.Rag(wss[i], probs[i], agglo.boundary_mean, normalize_probabilities=True) g.agglomerate(0.75) assert_allclose(ev.vi(g.get_segmentation(), results[i]), 0.0, err_msg='No dam agglomeration failed.')
def test_learned_agglo_4channel(): rf4 = classify.load_classifier('example-data/rf4.joblib') learned_policy4 = agglo.classifier_probability(fc, rf4) g_test4 = agglo.Rag(ws_test, p4_test, learned_policy4, feature_manager=fc) g_test4.agglomerate(0.5) seg_test4 = g_test4.get_segmentation() seg_test4_result = imio.read_h5_stack('example-data/test-seg4.lzf.h5') assert_array_equal(seg_test4, seg_test4_result)
def test_ladder_agglomeration(): i = 2 g = agglo.Rag(wss[i], probs[i], agglo.boundary_mean, normalize_probabilities=True) g.agglomerate_ladder(3) g.agglomerate(0.51) assert_allclose(ev.vi(g.get_segmentation(), results[i]), 0.0, err_msg='Ladder agglomeration failed.')
def test_learned_agglo_1channel(): rf = classify.load_classifier('example-data/rf1.joblib') learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, pr_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test1 = g_test.get_segmentation() seg_test1_result = imio.read_h5_stack('example-data/test-seg1.lzf.h5') assert_array_equal(seg_test1, seg_test1_result)
def test_training_4channel(): g_train4 = agglo.Rag(ws_train, p4_train, feature_manager=fc) np.random.RandomState(0) (X4, y4, w4, merges4) = g_train4.learn_agglomerate(gt_train, fc)[0] X4_expected, y4_expected = load_training_data( 'example-data/train-set4.npz') assert_allclose(X4, X4_expected, atol=1e-6) assert_allclose(y4, y4_expected, atol=1e-6)
def test_ladder_agglomeration(): i = 2 g = agglo.Rag(wss[i], probs[i], agglo.boundary_mean, normalize_probabilities=True) g.agglomerate_ladder(2) g.agglomerate(0.5) assert_array_equal(g.get_segmentation(), results[i], 'Ladder agglomeration failed.')
def testAggoBuild(self): from gala import agglo watershed, boundary, dummy = self.gen_watershed() stack = agglo.Rag(watershed, boundary, nozeros=True) self.assertEqual(stack.number_of_nodes(), 3630) stack.agglomerate(0.5) self.assertEqual(stack.number_of_nodes(), 61) stack.remove_inclusions() self.assertEqual(stack.number_of_nodes(), 61)
def test_convex_hull(): ws = np.array([[1, 2, 2], [1, 1, 2], [1, 2, 2]], dtype=np.uint8) chull = features.convex_hull.Manager() g = agglo.Rag(ws, feature_manager=chull) expected = np.array([ 0.5, 0.125, 0.5, 0.1, 1., 0.167, 0.025, 0.069, 0.44, 0.056, 1.25, 1.5, 1.2, 0.667 ]) assert_allclose(chull(g, 1, 2), expected, atol=0.01, rtol=1.)
def run_matched(f, fn, c=1, edges=[(1, 2), (6, 3), (7, 4)], merges=[(1, 2), (6, 3)]): p = probs1 if c == 1 else probs2 g = agglo.Rag(wss1, p, feature_manager=f, use_slow=True) o = list_of_feature_arrays(g, f, edges, merges) with open(fn, 'rb') as fin: r = pck.load(fin, encoding='bytes') assert_equal_lists_or_arrays(o, r)
def run_matched(f, fn, c=1, edges=[(1, 2), (1, 3), (1, 4)], merges=[(1, 2), (1, 3)]): p = probs1 if c == 1 else probs2 g = agglo.Rag(wss1, p, feature_manager=f) o = list_of_feature_arrays(g, f, edges, merges) r = pck.load(open(fn, 'r')) assert_equal_lists_or_arrays(o, r)
def test_mask(): i = 1 mask = np.array( [[1, 1, 1, 1, 1], [1, 0, 1, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]], dtype=bool) g = agglo.Rag(wss[i], probs[i], mask=mask) assert 3 not in g assert (1, 2) in g.edges() assert (1, 5) in g.edges() or (5, 1) in g.edges() assert (2, 4) in g.edges()
def dummy_data(): frag = np.arange(1, 17, dtype=int).reshape((4, 4)) gt = np.array([[1, 1, 2, 2], [1, 1, 2, 2], [3] * 4, [3] * 4], dtype=int) pr = 0.1 * np.array([[0, 1, 0, 9, 7, 0, 2, 0], [0, 1, 0, 9, 7, 0, 2, 0], [0, 1, 0, 9, 7, 0, 2, 0], [9, 8, 7, 9, 7, 8, 9, 9], [9, 8, 7, 9, 7, 8, 9, 9], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0]]) frag = ndi.zoom(frag, 2, order=0) gt = ndi.zoom(gt, 2, order=0) g = agglo.Rag(frag, pr, merge_priority_function=agglo.boundary_mean) return frag, gt, g
def test_segment_with_gala_classifer(dummy_data_fast): frag, gt, g, fman = dummy_data_fast np.random.seed(5) summary, allepochs = g.learn_agglomerate(gt, fman, learning_mode='strict', classifier='logistic regression', min_num_epochs=5) feat, target, weights, edges = summary ffeat, ftarget, fweights, fedges = allepochs[0] # flat lr = LR().fit(feat, target[:, 0]) gala_policy = agglo.classifier_probability(fman, lr) flr = LR().fit(ffeat, ftarget[:, 0]) flat_policy = agglo.classifier_probability(fman, flr) gtest = agglo.Rag(frag, feature_manager=fman, merge_priority_function=gala_policy) gtest.agglomerate(0.5) assert ev.vi(gtest.get_segmentation(), gt) == 0 gtest_flat = agglo.Rag(frag, feature_manager=fman, merge_priority_function=flat_policy) assert ev.vi(gtest_flat.get_segmentation(0.5), gt) == 1.5
def test_mito(): i = 5 def frozen(g, i): "hardcoded frozen nodes representing mitochondria" return i in [3, 4] g = agglo.Rag(wss[i], probs[i], agglo.no_mito_merge(agglo.boundary_mean), normalize_probabilities=True, isfrozennode=frozen) g.agglomerate(0.15) g.merge_priority_function = agglo.mito_merge() g.rebuild_merge_queue() g.agglomerate(1.0) assert_allclose(ev.vi(g.get_segmentation(), results[i]), 0.0, err_msg='Mito merge failed')
def bench_suite(): times = OrderedDict() memory = OrderedDict() wstr, prtr, gttr = trdata() with timer() as t_build_rag: g = agglo.Rag(wstr, prtr) times['build RAG'] = t_build_rag[0] memory['base RAG'] = asizeof(g) with timer() as t_features: g.set_feature_manager(em) times['build feature caches'] = t_features[0] memory['feature caches'] = asizeof(g) - memory['base RAG'] with timer() as t_flat: _ignore = g.learn_flat(gttr, em) times['learn flat'] = t_flat[0] with timer() as t_gala: (X, y, w, e), allepochs = g.learn_agglomerate(gttr, em, min_num_epochs=5) y = y[:, 0] # ignore rand-sign and vi-sign schemes memory['training data'] = asizeof((X, y, w, e)) times['learn agglo'] = t_gala[0] with timer() as t_train_classifier: cl = classify.DefaultRandomForest() cl.fit(X, y) times['classifier training'] = t_train_classifier[0] memory['classifier training'] = asizeof(cl) policy = agglo.classifier_probability(em, cl) wsts, prts, gtts = tsdata() gtest = agglo.Rag(wsts, prts, merge_priority_function=policy, feature_manager=em) with timer() as t_segment: gtest.agglomerate(np.inf) times['segment test volume'] = t_segment[0] memory['segment test volume'] = asizeof(gtest) return times, memory
def test_segment_with_classifier_4_channel(): if PYTHON_VERSION == 2: rf = classify.load_classifier( os.path.join(rundir, 'example-data/rf-4.joblib')) else: fn = os.path.join(rundir, 'example-data/rf4-py3.joblib') with tar_extract(fn) as fn: rf = joblib.load(fn) learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, p4_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test = g_test.get_segmentation() seg_expected = imio.read_h5_stack( os.path.join(rundir, 'example-data/test-seg-4.lzf.h5')) assert_allclose(ev.vi(seg_test, seg_expected), 0.0)
def testAggloRFBuild(self): from gala import agglo from gala import features from gala import classify self.datadir = os.path.abspath( os.path.dirname(sys.modules["gala"].__file__)) + "/testdata/" cl = classify.load_classifier(self.datadir + "agglomclassifier.rf.h5") fm_info = json.loads(str(cl.feature_description)) fm = features.io.create_fm(fm_info) mpf = agglo.classifier_probability(fm, cl) watershed, dummy, prediction = self.gen_watershed() stack = agglo.Rag(watershed, prediction, mpf, feature_manager=fm, nozeros=True) self.assertEqual(stack.number_of_nodes(), 3630) stack.agglomerate(0.1) self.assertEqual(stack.number_of_nodes(), 88) stack.remove_inclusions() self.assertEqual(stack.number_of_nodes(), 86)
def train(args): gt_train, pr_train, ws_train = (map(imio.read_h5_stack, [args.gt_file, args.prob_file, args.ws_file])) #['train-gt.lzf.h5', 'train-p1.lzf.h5', # 'train-ws.lzf.h5'])) #print('training') #gt_train = np.load(args.gt_file) #X,Y,Z #gt_train = np.transpose(gt_train,(2,0,1)) #gala wants z,x,y? #pr_train = np.load(args.prob_file) #X,Y,Z #pr_train = np.transpose(np.squeeze(pr_train),(2,0,1)) #gala wants z,x,y? #pr_train = pr_train[0:50,0:256,0:256] #pr_train = np.around(pr_train,decimals=2) #gt_train = gt_train[0:50,0:256,0:256] #print('watershed') #seeds = label(pr_train==0)[0] #seeds_cc_threshold = args.seeds_cc_threshold #seeds = morpho.remove_small_connected_components(seeds, # seeds_cc_threshold) #ws_train = skmorph.watershed(pr_train, seeds) fm = features.moments.Manager() fh = features.histogram.Manager() fc = features.base.Composite(children=[fm, fh]) g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) (X, y, w, merges) = g_train.learn_agglomerate(gt_train, fc)[0] y = y[:, 0] # gala has 3 truth labeling schemes, pick the first one rf = classify.DefaultRandomForest().fit(X, y) learned_policy = agglo.classifier_probability(fc, rf) #save learned_policy #np.savez(args.outfile, rf=rf, fc=fc) binary_file = open(args.outfile,mode='wb') lp_dump = pickle.dump([fc,rf], binary_file) binary_file.close()