def bench_suite(): times = OrderedDict() memory = OrderedDict() wstr, prtr, gttr = trdata() with timer() as t_build_rag: g = agglo.Rag(wstr, prtr) times['build RAG'] = t_build_rag[0] memory['base RAG'] = asizeof(g) with timer() as t_features: g.set_feature_manager(em) times['build feature caches'] = t_features[0] memory['feature caches'] = asizeof(g) - memory['base RAG'] with timer() as t_flat: _ignore = g.learn_flat(gttr, em) times['learn flat'] = t_flat[0] with timer() as t_gala: (X, y, w, e), allepochs = g.learn_agglomerate(gttr, em, min_num_epochs=5) y = y[:, 0] # ignore rand-sign and vi-sign schemes memory['training data'] = asizeof((X, y, w, e)) times['learn agglo'] = t_gala[0] with timer() as t_train_classifier: cl = classify.DefaultRandomForest() cl.fit(X, y) times['classifier training'] = t_train_classifier[0] memory['classifier training'] = asizeof(cl) policy = agglo.classifier_probability(em, cl) wsts, prts, gtts = tsdata() gtest = agglo.Rag(wsts, prts, merge_priority_function=policy, feature_manager=em) with timer() as t_segment: gtest.agglomerate(np.inf) times['segment test volume'] = t_segment[0] memory['segment test volume'] = asizeof(gtest) return times, memory
def _train_model(self, model_file=None): print("Creating GALA feature manager...") fm = features.moments.Manager() fh = features.histogram.Manager(25, 0, 1, [0.1, 0.5, 0.9]) # Recommended numbers in the repo fg = features.graph.Manager() fc = features.contact.Manager() self.fm = features.base.Composite(children=[fm, fh, fg, fc]) if model_file is not None and os.path.isfile(model_file): print('Loading model from path ...') rf = classify.load_classifier(model_file) else: gt, pr, sv = (map(imio.read_h5_stack, [self.gt, self.mem, self.sp])) print("Creating training RAG...") g_train = agglo.Rag(sv, pr, feature_manager=self.fm) print("Learning agglomeration...") (X, y, w, merges) = g_train.learn_agglomerate(gt, self.fm, learning_mode='permissive', min_num_epochs=self.min_ep)[0] y = y[:, 0] rf = classify.DefaultRandomForest().fit(X, y) # Save if path requested if model_file is not None: classify.save_classifier(rf, model_file) self.model = agglo.classifier_probability(self.fm, rf)
def test_learned_agglo_4channel(): rf4 = classify.load_classifier('example-data/rf4.joblib') learned_policy4 = agglo.classifier_probability(fc, rf4) g_test4 = agglo.Rag(ws_test, p4_test, learned_policy4, feature_manager=fc) g_test4.agglomerate(0.5) seg_test4 = g_test4.get_segmentation() seg_test4_result = imio.read_h5_stack('example-data/test-seg4.lzf.h5') assert_array_equal(seg_test4, seg_test4_result)
def test_learned_agglo_1channel(): rf = classify.load_classifier('example-data/rf1.joblib') learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, pr_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test1 = g_test.get_segmentation() seg_test1_result = imio.read_h5_stack('example-data/test-seg1.lzf.h5') assert_array_equal(seg_test1, seg_test1_result)
def test_segment_with_classifier_4_channel(): fn = os.path.join(rundir, 'example-data/rf4-py3.joblib') with tar_extract(fn) as fn: rf = joblib.load(fn) learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, p4_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test = g_test.get_segmentation() seg_expected = imio.read_h5_stack( os.path.join(rundir, 'example-data/test-seg-4.lzf.h5')) assert_allclose(ev.vi(seg_test, seg_expected), 0.0)
def test_segment_with_gala_classifer(dummy_data): frag, gt, g, fman = dummy_data np.random.seed(5) summary, allepochs = g.learn_agglomerate(gt, fman, learning_mode='strict', classifier='logistic regression') feat, target, weights, edges = summary ffeat, ftarget, fweights, fedges = allepochs[0] # flat lr = LR().fit(feat, target[:, 0]) gala_policy = agglo.classifier_probability(fman, lr) flr = LR().fit(ffeat, ftarget[:, 0]) flat_policy = agglo.classifier_probability(fman, flr) gtest = agglo.Rag(frag, feature_manager=fman, merge_priority_function=gala_policy) gtest.agglomerate(0.5) assert ev.vi(gtest.get_segmentation(), gt) == 0 gtest_flat = agglo.Rag(frag, feature_manager=fman, merge_priority_function=flat_policy) assert ev.vi(gtest_flat.get_segmentation(0.5), gt) == 1.5
def test_segment_with_gala_classifer(dummy_data_fast): frag, gt, g, fman = dummy_data_fast np.random.seed(5) summary, allepochs = g.learn_agglomerate(gt, fman, learning_mode='strict', classifier='logistic regression', min_num_epochs=5) feat, target, weights, edges = summary ffeat, ftarget, fweights, fedges = allepochs[0] # flat lr = LR().fit(feat, target[:, 0]) gala_policy = agglo.classifier_probability(fman, lr) flr = LR().fit(ffeat, ftarget[:, 0]) flat_policy = agglo.classifier_probability(fman, flr) gtest = agglo.Rag(frag, feature_manager=fman, merge_priority_function=gala_policy) gtest.agglomerate(0.5) assert ev.vi(gtest.get_segmentation(), gt) == 0 gtest_flat = agglo.Rag(frag, feature_manager=fman, merge_priority_function=flat_policy) assert ev.vi(gtest_flat.get_segmentation(0.5), gt) == 1.5
def test_segment_with_classifer_1_channel(): if PYTHON_VERSION == 2: rf = classify.load_classifier(os.path.join(rundir, "example-data/rf-1.joblib")) else: fn = os.path.join(rundir, "example-data/rf1-py3.joblib") with tar_extract(fn) as fn: rf = joblib.load(fn) learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, pr_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test = g_test.get_segmentation() # imio.write_h5_stack(seg_test, 'example-data/test-seg-1.lzf.h5') seg_expected = imio.read_h5_stack(os.path.join(rundir, "example-data/test-seg-1.lzf.h5")) assert_allclose(ev.vi(seg_test, seg_expected), 0.0)
def test_segment_with_classifier_4_channel(): if PYTHON_VERSION == 2: rf = classify.load_classifier( os.path.join(rundir, 'example-data/rf-4.joblib')) else: fn = os.path.join(rundir, 'example-data/rf4-py3.joblib') with tar_extract(fn) as fn: rf = joblib.load(fn) learned_policy = agglo.classifier_probability(fc, rf) g_test = agglo.Rag(ws_test, p4_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) seg_test = g_test.get_segmentation() seg_expected = imio.read_h5_stack( os.path.join(rundir, 'example-data/test-seg-4.lzf.h5')) assert_allclose(ev.vi(seg_test, seg_expected), 0.0)
def testAggloRFBuild(self): from gala import agglo from gala import features from gala import classify self.datadir = os.path.abspath(os.path.dirname(sys.modules["gala"].__file__)) + "/testdata/" cl = classify.load_classifier(self.datadir + "agglomclassifier.rf.h5") fm_info = json.loads(str(cl.feature_description)) fm = features.io.create_fm(fm_info) mpf = agglo.classifier_probability(fm, cl) watershed, dummy, prediction = self.gen_watershed() stack = agglo.Rag(watershed, prediction, mpf, feature_manager=fm, nozeros=True) self.assertEqual(stack.number_of_nodes(), 3630) stack.agglomerate(0.1) self.assertEqual(stack.number_of_nodes(), 88) stack.remove_inclusions() self.assertEqual(stack.number_of_nodes(), 86)
def train(index): out_fn = 'training-data-%i.h5' % index if os.path.exists(out_fn): data, labels = classify.load_training_data_from_disk(out_fn, names=['data', 'labels']) else: ws_tr = imio.read_image_stack('watershed-%i.lzf.h5' % index) pr_tr = imio.read_image_stack('probabilities-%i.lzf.h5' % index) / 255 gt_tr = imio.read_image_stack('ground-truth-%i.lzf.h5' % index) g = agglo.Rag(ws_tr, pr_tr, feature_manager=fman) data, labels = g.learn_agglomerate(gt_tr, fman, min_num_epochs=4)[0][:2] classify.save_training_data_to_disk([data, labels], fn='training-data-%i.h5' % index, names=['data', 'labels']) print('total training data:', data.shape) print('size in MB:', data.size * data.itemsize / 1e6) rf = classify.DefaultRandomForest() rf.fit(data, labels[:, 0]) policy = agglo.classifier_probability(fman, rf) return policy
def testAggloRFBuild(self): from gala import agglo from gala import features from gala import classify self.datadir = os.path.abspath( os.path.dirname(sys.modules["gala"].__file__)) + "/testdata/" cl = classify.load_classifier(self.datadir + "agglomclassifier.rf.h5") fm_info = json.loads(str(cl.feature_description)) fm = features.io.create_fm(fm_info) mpf = agglo.classifier_probability(fm, cl) watershed, dummy, prediction = self.gen_watershed() stack = agglo.Rag(watershed, prediction, mpf, feature_manager=fm, nozeros=True) self.assertEqual(stack.number_of_nodes(), 3630) stack.agglomerate(0.1) self.assertEqual(stack.number_of_nodes(), 88) stack.remove_inclusions() self.assertEqual(stack.number_of_nodes(), 86)
def train(args): gt_train, pr_train, ws_train = (map(imio.read_h5_stack, [args.gt_file, args.prob_file, args.ws_file])) #['train-gt.lzf.h5', 'train-p1.lzf.h5', # 'train-ws.lzf.h5'])) #print('training') #gt_train = np.load(args.gt_file) #X,Y,Z #gt_train = np.transpose(gt_train,(2,0,1)) #gala wants z,x,y? #pr_train = np.load(args.prob_file) #X,Y,Z #pr_train = np.transpose(np.squeeze(pr_train),(2,0,1)) #gala wants z,x,y? #pr_train = pr_train[0:50,0:256,0:256] #pr_train = np.around(pr_train,decimals=2) #gt_train = gt_train[0:50,0:256,0:256] #print('watershed') #seeds = label(pr_train==0)[0] #seeds_cc_threshold = args.seeds_cc_threshold #seeds = morpho.remove_small_connected_components(seeds, # seeds_cc_threshold) #ws_train = skmorph.watershed(pr_train, seeds) fm = features.moments.Manager() fh = features.histogram.Manager() fc = features.base.Composite(children=[fm, fh]) g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) (X, y, w, merges) = g_train.learn_agglomerate(gt_train, fc)[0] y = y[:, 0] # gala has 3 truth labeling schemes, pick the first one rf = classify.DefaultRandomForest().fit(X, y) learned_policy = agglo.classifier_probability(fc, rf) #save learned_policy #np.savez(args.outfile, rf=rf, fc=fc) binary_file = open(args.outfile,mode='wb') lp_dump = pickle.dump([fc,rf], binary_file) binary_file.close()
# create a feature manager fm = features.moments.Manager() fh = features.histogram.Manager() fc = features.base.Composite(children=[fm, fh]) # create Region Adjacency Graph (RAG) and obtain a training dataset g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) (X, y, w, merges) = g_train.learn_agglomerate(gt_train, fc)[0] y = y[:, 0] # gala has 3 truth labeling schemes, pick the first one ???? print((X.shape, y.shape)) # standard scikit-learn input format # train a classifier, scikit-learn syntax rf = classify.DefaultRandomForest().fit(X, y) # a policy is the composition of a feature map and a classifier # policy = merge priority function learned_policy = agglo.classifier_probability(fc, rf) # get the test data and make a RAG with the trained policy pr_test, ws_test = (map(imio.read_h5_stack, [h5File_test_probMap, h5File_test_ws])) g_test = agglo.Rag(ws_test, pr_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) # best expected segmentation obtained with a threshold of 0.5 seg_test1 = g_test.get_segmentation() # convert hdf into png and save np_data = np.array(seg_test1) sizeZ,sizeY,sizeX = np_data.shape for i in range(0,sizeZ): im1 = np_data[i,:,:] im = Image.fromarray(im1.astype('uint8')) imFileName = str(i).zfill(3) + ".png"
# create a feature manager fm = features.moments.Manager() fh = features.histogram.Manager() fc = features.base.Composite(children=[fm, fh]) # create graph and obtain a training dataset g_train = agglo.Rag(ws_train, pr_train, feature_manager=fc) (X, y, w, merges) = g_train.learn_agglomerate(gt_train, fc)[0] y = y[:, 0] # gala has 3 truth labeling schemes, pick the first one print((X.shape, y.shape)) # standard scikit-learn input format # train a classifier, scikit-learn syntax rf = classify.DefaultRandomForest().fit(X, y) # a policy is the composition of a feature map and a classifier learned_policy = agglo.classifier_probability(fc, rf) # get the test data and make a RAG with the trained policy pr_test, ws_test = (map(imio.read_h5_stack, ['test-p1.lzf.h5', 'test-ws.lzf.h5'])) g_test = agglo.Rag(ws_test, pr_test, learned_policy, feature_manager=fc) g_test.agglomerate(0.5) # best expected segmentation seg_test1 = g_test.get_segmentation() # the same approach works with a multi-channel probability map p4_train = imio.read_h5_stack('train-p4.lzf.h5') # note: the feature manager works transparently with multiple channels! g_train4 = agglo.Rag(ws_train, p4_train, feature_manager=fc) (X4, y4, w4, merges4) = g_train4.learn_agglomerate(gt_train, fc)[0] y4 = y4[:, 0] print((X4.shape, y4.shape))
def deploy(args): #probability map print("Deploying through driver") if args.prob_file.endswith('.hdf5'): mem = imio.read_image_stack(args.prob_file, single_channel=False) else: mem = np.load(args.prob_file) #X,Y,Z mem = np.transpose(np.squeeze(mem),(2,0,1)) #gala wants z,x,y? pr_test = np.zeros_like(mem) for z in range(0,mem.shape[0]): pr_test[z,:,:] = dilation(mem[z,:,:], disk(10)) pr_test[z,:,:] = erosion(mem[z,:,:], disk(4)) seg_out = np.zeros(pr_test.shape) pr_dim = pr_test.shape xsize = pr_dim[1] ysize = pr_dim[2] zsize = pr_dim[0] print(pr_dim) print(pr_dim[0]) print(np.int(pr_dim[0]/zsize)) print("Starting loop") for iz in range(0,np.int(pr_dim[0]/zsize)): for ix in range(0,np.int(pr_dim[1]/xsize)): for iy in range(0,np.int(pr_dim[2]/ysize)): p0 = pr_test[iz*zsize+0:iz*zsize+zsize,ix*xsize+0:ix*xsize+xsize,iy*ysize+0:iy*ysize+ysize] p0 = np.around(p0,decimals=2) print(p0) #get trained classifier #npzfile = np.load(args.train_file) #rf = npzfile['rf'] #fc = npzfile['fc'] binary_file = open(args.train_file,mode='rb') print(binary_file) temp = pickle.load(binary_file) fc = temp[0] rf = temp[1] binary_file.close() learned_policy = agglo.classifier_probability(fc, rf) #pr_test = (map(imio.read_h5_stack, # ['test-p1.lzf.h5'])) print('watershed') seeds = label(p0==0)[0] seeds_cc_threshold = args.seeds_cc_threshold seeds = morpho.remove_small_connected_components(seeds, seeds_cc_threshold) ws_test = skmorph.watershed(p0, seeds) g_test = agglo.Rag(ws_test, p0, learned_policy, feature_manager=fc) g_test.agglomerate(args.agg_threshold) #This is a map of labels of the same shape as the original image. seg_test1 = g_test.get_segmentation() seg_out[iz*zsize+0:iz*zsize+zsize,ix*xsize+0:ix*xsize+xsize,iy*ysize+0:iy*ysize+ysize] = seg_test1 seg_out = np.transpose(seg_out,(1,2,0)) with open(args.outfile, 'wb') as f: np.save(f,seg_out) return
def policy(): rf = classify.DefaultRandomForest() cl = agglo.classifier_probability(em, rf) return cl