def main(setName=['00.00', '00.01','01.00','01.01','02.00','02.01','03.00','04.00','04.01'], base='caesar', _k=5, _percentile=99, _max_iter=50, _overlap=0.1, _chunk_size=32, _padding=25, _merge=0.1): ''' Main method for NMF approach. This is a wrapper built upon the original pipeline of NMF in Thunder Extraction. The code for putting data into json files is from: https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113 ''' submission = [] for data in setName: images = ThunderNMF.load(setName, base) images = ThunderNMF.grayScale(images) print ('The shape of each training image after preprocessing is {}'.format(images[1].shape)) print ('Applying median filter for {}.test'.format(data)) images = ThunderNMF.medianFilter(images) print ('Applying NMF for {}.test.....'.format(data)) algorithm = NMF(k=_k, percentile=_percentile, max_iter=_max_iter, overlap=_overlap) model = algorithm.fit(images, chunk_size=(_chunk_size,_chunk_size), padding=(_padding,_padding)) print ('Merge regions for {}.test....'.format(data)) merged = model.merge(_merge) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': '{}.test'.format(data), 'regions': regions} submission.append(result) # show a message for processing print ('Completed processing results for {}.test'.format(data)) with open('{}.json'.format('submission'), 'w') as f: f.write(json.dumps(submission)) print ('Done!')
def test_nmf_many_padding(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100), padding=(25, 25)) distances = cdist(model.regions.center, truth.regions.center) assert model.regions.count == 10 assert allclose(sum(distances < 10), [2, 1, 2, 4, 1])
def get_output(self): """ Writes output of NMF model into JSON file with the name `submission.json` """ submission = [] for dataset in self.datasets: print('Loading dataset: %s ' %dataset) dataset_path = 'neurofinder.' + dataset path = os.path.join('D:/Spring2019/DataSciencePracticum/p3', dataset_path, 'images') # Getting the images data from path # Returns only first image of dataset if test is True if self.test == True: data = td.images.fromtif(path, ext='tiff').first() else: data = td.images.fromtif(path, ext='tiff') nmf = NMF(k=self.k, percentile=self.percentile, max_iter=self.max_iter, overlap=0.1) # Fitting on the given dataset model = nmf.fit(data, chunk_size=(100,100), padding=(25,25)) merged = model.merge(self.merge_ratio) # Storing found regions in the required format regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) # Writing the results to submission.json with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def test_merging(eng): data, series, truth = make_gaussian(n=20, seed=42, noise=0.5, withparams=True) algorithm = NMF(k=5, percentile=95, max_iter=50, overlap=0.1) model = algorithm.fit(data, chunk_size=(50,100), padding=(15,15)) assert model.regions.count > 20 assert model.merge(overlap=0.5).regions.count <= 20 assert model.merge(overlap=0.1).regions.count < 18
def main(arg): datasets = os.listdir(arg.test) submission = [] for dataset in datasets: if dataset.startswith('neuro'): print('processing dataset:' + dataset) print('loading') data = td.images.fromtif(arg.test + dataset + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=10, percentile=99, max_iter=30, overlap=0.2) model = algorithm.fit(data, chunk_size=(100, 100), padding=(25, 25)) merged = model.merge(0.2) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open(arg.outputDir + 'submission.json', 'w') as f: f.write(json.dumps(submission))
def main(config_data): data = td.images.fromtif(path=config_data["input_path"], engine=sc, npartitions=int(config_data["npartitions"])) ############################################################ #Code for reduction of noise from image using gaussion filter ############################################################ data = data.map(lambda x: gaussian_filter( x, sigma=float(config_data["sigma"]), order=0)) #################################################################################################### # Code for Motion Correction using Image Registration , this process help in alignment of the images #################################################################################################### reference = data.mean().toarray() algorithmMC = CrossCorr() model = algorithmMC.fit(data, reference) shifts = model.transformations registered = model.transform(data) #################################################################################### # Code for Local Non-negative Matrix Factorization for Image Extraction #################################################################################### algorithm = NMF(k=int(config_data["k"]), percentile=int(config_data["percentile"]), min_size=int(config_data["min_size"]), max_iter=int(config_data["max_iter_nmf"]), overlap=float(config_data["overlap_nmf"])) model = algorithm.fit(registered, chunk_size=(int(config_data["chunk_size_1"]), int(config_data["chunk_size_2"])), padding=(int(config_data["padding_1"]), int(config_data["padding_2"]))) #################################################################################### #Code for finding ROI using spatial region extracted in NMF process #################################################################################### merged = model.merge(overlap=float(config_data["overlap_merge"]), max_iter=int(config_data["max_iter_merge"]), k_nearest=int(config_data["k_nearest"])) print('Total no of regions found %g' % merged.regions.count) ##################################################################### #Code for dumping the identified ROI co-ordinates in JSON file ##################################################################### regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': config_data["dataset"], 'regions': regions} submission.append(result) with open(config_data["output"] + '.json', 'w') as f: f.write(json.dumps(submission))
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(100, 200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def compute_nmf(datasets, var_data_path, var_num_components, var_percentile, var_max_iter, var_overlap, var_chunk_size): """ This code is a modified version of the baseline code given below. @ https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113/ This function performs nmf and saves the results. Parameters ---------- datasets : List List of dataset names. var_data_path: String Path to data folder var_num_components: Int The number of components to estimate per block. var_percentile: Int The value for thresholding. var_max_iter : Int The maximum number of algorithm iterations. var_overlap: Int The value for determining whether to merge. var_chunk_size: Int The the chunk size. """ if var_data_path[-1] != '/': var_data_path = var_data_path + '/' submission = [] for dataset in datasets: path = var_data_path + dataset data = td.images.fromtif(path + '/images', ext='tiff') print('done') algorithm = NMF(k=var_num_components, percentile=var_percentile, max_iter=var_max_iter, overlap=var_overlap) model = algorithm.fit(data, chunk_size=(var_chunk_size, var_chunk_size)) print('done') merged = model.merge(var_overlap) print('done') print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] # We slice dataset at 12 to reomvove 'neurofinder' from the name result = {'dataset': dataset[12:], 'regions': regions} submission.append(result) print('writing results') with open(var_data_path + 'submission.json', 'w') as f: f.write(json.dumps(submission))
def test_nmf_many_chunked(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100)) assert model.regions.count == 5 assert allclose( sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1, 1])
def model_thunder(img_res): print("the model_creation") submission=[] algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1) model = algorithm.fit(img_res, chunk_size=(50,50), padding=(25,25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': onlyfiles, 'regions': regions} submission.append(result) print('writing results') with open('submission4.json', 'w') as f: f.write(json.dumps(submission)) return
def learn_data(self): for d in self.data: path = self.folder + d print "Analyzing " + path image_data = td.images.fromtif(path + '/images', ext='tiff') algorithm = NMF(k=10, percentile=99, max_iter=50, overlap=0.1) model = algorithm.fit(image_data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': self.data, 'regions': regions} self.submission.append(result)
def applyNMF(args): print("Applying NMF") global imageList global chunkSize global padding global regions model = NMF(k=args.nmf_k, max_iter=args.nmf_maxIter, percentile=args.nmf_percentile, overlap=args.nmf_overlap, min_size=args.nmf_minSize) model = model.fit(imageList, chunk_size=chunkSize, padding=padding) merged = model.merge(overlap=args.nmf_overlap, max_iter=args.fit_mergeIter, k_nearest=args.merge_kNearest) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]
def main(datasets, base_dir, output_dir="output/nmf", gaussian_blur=0, verbose=False, n_components=5, max_iter=20, threshold=99, overlap=0.1, chunk_size=(32, 32), padding=(20, 20), merge_iter=5): """ Performs neuron segementation using the NMF implementation provided by thunder-extraction Results will be written to <output_dir>/00.00-output.json :param datasets: list of datasets (by name) to generate results for :param base_dir: directory that contains the datasets :param output_dir: directory where output file should be written :param k: number of components to estimate per block :param threshold: value for thresholding (higher means more thresholding) :param overlap: value for determining whether to merge (higher means fewer merges) :param chunk_size: process images in chunks of this size :param padding: add this much padding to each chunk :param merge_iter: number of iterations to perform when merging regions :return: array of bath.Result objects representing the result on each dataset """ results = [] for dataset_name in datasets: if verbose: print("Processing dataset %s" % dataset_name) dataset = preprocess.load(dataset_name, base_dir) if verbose: print("Dataset loaded.") if gaussian_blur > 0: chunks = np.array_split(dataset.images, 30) summaries = np.array(list(map(preprocess.compute_summary, chunks))) # summaries = dataset.images = preprocess.gaussian_blur(summaries, gaussian_blur) model = NMF(k=n_components, max_iter=max_iter, percentile=threshold, overlap=overlap, min_size=20) model = model.fit(dataset.images, chunk_size=chunk_size, padding=padding) merged = model.merge(overlap=overlap, max_iter=merge_iter, k_nearest=20) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = Result(name=dataset_name, regions=regions) results.append(result) if verbose: print("Done with dataset %s" % dataset_name) if dataset.has_ground_truth() and verbose: f_score = result.f_score(dataset.true_regions) print("Combined score for dataset %s: %0.4f" % (dataset_name, f_score)) if verbose: print("Writing results to %s" % output_dir) postprocess.write_results(results, output_dir, name="nmf-output.json") return results
def NMF_experiments(k_value=5, max_size_value='full', min_size_value=20, percentile_value=99, max_iterations=50, overlap_value=0.1): """ The algorithm takes the following parameters. k number of components to estimate per block max_size maximum size of each region min_size minimum size for each region max_iter maximum number of algorithm iterations percentile value for thresholding (higher means more thresholding) overlap value for determining whether to merge (higher means fewer merges) """ datasets = [ '00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test', '02.01.test', '03.00.test', '04.00.test', '04.01.test' ] submission = [] for dataset in datasets: print('processing dataset: %s' % dataset) print('loading') path = '../data/neurofinder.' + dataset data = td.images.fromtif(path + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=k_value, max_size=max_size_value, min_size=min_size_value, percentile=percentile_value, max_iter=max_iterations, overlap=overlap_value) model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def fit_NMF(data, n_comps=3, iters=50, percentile=95, chunk_size=(60, 60), overlap=0.1): """ fits nmf to dataset Parameters ---------- data : numpy matrix the video to which the NMF is fit n_comps : int number of components to estimate per block iters : int max number of algorithm iterations percentile : int the value for thresholding chunk_size : tuple width and height of chunk, two values overlap : float value determining whether to merge Returns ------- regions : list a list of regions extracted by NMF """ model = NMF(k=n_comps, max_iter=iters, percentile=percentile, overlap=overlap) model = model.fit(data, chunk_size=chunk_size, padding=(20, 20)) merged = model.merge(overlap=overlap, max_iter=iters, k_nearest=20) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] return regions
def nmf(args): dataset = args.dataset if dataset[len(dataset)-1] != '/': dataset += '/' dirs = sorted(glob(dataset + '*/')) for d in dirs: print("Working on folder", d) # Read images from the dataset path = d + 'images/' V = td.images.fromtif(path, ext = 'tiff') # denoising and smoothing filters V.median_filter() V.gaussian_filter() # Applying NMF on data algorithm = NMF(k = 10, max_iter = 30, percentile = 99) model = algorithm.fit(V, chunk_size = (50,50), padding = (25, 25)) merged = model.merge(overlap = 0.1, max_iter = 2, k_nearest = 5) # extracting ROI roi = [{'coordinates': r.coordinates.tolist()} for r in merged.regions] # converting to json format dataset = d[d.find('neurofinder.') + 12 : d.find('neurofinder.') + d[d.find('neurofinder.'):].find('/')] json_string = {'dataset': dataset, 'regions': roi} # writing to json file output = args.output if not os.path.exists(output): os.makedirs(output) if output[len(output)-1] != '/': output += '/' f = open(output + dataset + '.json', 'w') f.write(json.dumps(json_string)) f.close()
def model_implementaiton(datasets, path): submission = [] for dataset in datasets: print(path) print('processing dataset: %s' % dataset) print('loading') temp_path = path + dataset print(temp_path) data = td.images.fromtif(temp_path + '/images', ext='tiff') print('analyzing') #algorithm = NMF(k=5, percentile=99, max_iter=50, overlap=0.1) algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1) model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def main(arg): datasets = os.listdir(arg.test) submission = [] for dataset in datasets: if dataset.startswith('neuro'): if '00.00' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 elif '00.01' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 elif '01.00' in dataset: k_param = 5 percentile_param = 95 max_iter_param = 70 chunk_size_param = 50 elif '01.01' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 40 chunk_size_param = 50 elif '02.00' in dataset: k_param = 5 percentile_param = 99 max_iter_param = 30 chunk_size_param = 50 elif '02.01' in dataset: k_param = 5 percentile_param = 99 max_iter_param = 30 chunk_size_param = 50 elif '03.00' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 70 chunk_size_param = 45 elif '04.00' in dataset: k_param = 5 percentile_param = 97 max_iter_param = 60 chunk_size_param = 50 elif '04.01' in dataset: k_param = 5 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 print('processing dataset:' + dataset) print('loading') data = td.images.fromtif(arg.test + dataset + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=k_param, percentile=percentile_param, max_iter=max_iter_param, overlap=0.1) model = algorithm.fit(data, chunk_size=(chunk_size_param, chunk_size_param), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open(arg.outputDir + 'submission.json', 'w') as f: f.write(json.dumps(submission))
#Declare the output array. output_array = [] #Get the list of directories inside the for root, dirs, files in os.walk(store_data_path.value): for dir in dirs: #Load data from the source path and convert into RDD for parallalization. images = td.images.fromtif(store_data_path.value + '/' + dir + '/images', ext='tiff') # images = td.series.fromarray(images,engine=sc) #Run the NMF over the algorithms. #TODO: Set the parameters from command-line nmf_algo = NMF(k=10, max_iter=50) #Use default percentile 95 and overlap. nmf_model = nmf_algo.fit(images, chunk_size=(64, 64), padding=(8, 8)) #Set after some experiments. nmf_merge = nmf_model.merge() #Write the output to the final array to convert #The output is array, not RDD. So no reason to use RDD operations for sequential tasks. nmf_regions = [{ 'coordinates': region.coordinates.tolist() } for region in nmf_merge.regions] output = {'dataset': dir, 'regions': nmf_regions} output_array.append(output) break #Writing the output file. with open(store_data_path.value + '/output.json', 'w') as output_file: output_file.write(json.dumps(output_array))
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def test_nmf_many(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 5 assert allclose(sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1,1])
ext="tiff") print("DATA READ!") # # Creating NMF model # In[15]: #create the model and play with various values of k,percentile to get efficient results algorithm = NMF(k=5, max_iter=30, percentile=95, overlap=0.1) # # Fitting models for each dataset # In[16]: #fit our data in the model model = algorithm.fit(data, chunk_size=(50, 50)) # In[17]: #fixing overlapping pixels merged = model.merge(overlap=0.1) # In[18]: #saving cordinates value in a list and passing it to jsonString coordinates = [{'coordinates': x.coordinates.tolist()} for x in merged.regions] # In[19]: jsonString = {'dataset': "03.00.test", 'regions': coordinates}
for i in range(len(onlyfiles)): data.append( td.images.fromtif(path=path + onlyfiles[i] + '/images', engine=sc, ext="tiff")) print("DATA READ!") # # Creating NMF model algorithm = NMF(k=10, max_iter=20, percentile=95, overlap=0.1) # # Fitting models for each dataset model = list() for i in range(len(data)): model.append(algorithm.fit(data[i], chunk_size=(50, 50))) merged = list() for i in range(len(model)): merged.append(model[i].merge(overlap=0.1)) # Saving region coordinates as model: for i in range(len(merged)): coordinates = [{ 'coordinates': x.coordinates.tolist() } for x in merged[i].regions] jsonString = { 'dataset': onlyfiles[i].replace("neurofinder.", ""), 'regions': coordinates }