def main(setName=['00.00', '00.01','01.00','01.01','02.00','02.01','03.00','04.00','04.01'], base='caesar', _k=5, _percentile=99, _max_iter=50, _overlap=0.1, _chunk_size=32, _padding=25, _merge=0.1): ''' Main method for NMF approach. This is a wrapper built upon the original pipeline of NMF in Thunder Extraction. The code for putting data into json files is from: https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113 ''' submission = [] for data in setName: images = ThunderNMF.load(setName, base) images = ThunderNMF.grayScale(images) print ('The shape of each training image after preprocessing is {}'.format(images[1].shape)) print ('Applying median filter for {}.test'.format(data)) images = ThunderNMF.medianFilter(images) print ('Applying NMF for {}.test.....'.format(data)) algorithm = NMF(k=_k, percentile=_percentile, max_iter=_max_iter, overlap=_overlap) model = algorithm.fit(images, chunk_size=(_chunk_size,_chunk_size), padding=(_padding,_padding)) print ('Merge regions for {}.test....'.format(data)) merged = model.merge(_merge) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': '{}.test'.format(data), 'regions': regions} submission.append(result) # show a message for processing print ('Completed processing results for {}.test'.format(data)) with open('{}.json'.format('submission'), 'w') as f: f.write(json.dumps(submission)) print ('Done!')
def test_nmf_many_padding(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100), padding=(25, 25)) distances = cdist(model.regions.center, truth.regions.center) assert model.regions.count == 10 assert allclose(sum(distances < 10), [2, 1, 2, 4, 1])
def get_output(self): """ Writes output of NMF model into JSON file with the name `submission.json` """ submission = [] for dataset in self.datasets: print('Loading dataset: %s ' %dataset) dataset_path = 'neurofinder.' + dataset path = os.path.join('D:/Spring2019/DataSciencePracticum/p3', dataset_path, 'images') # Getting the images data from path # Returns only first image of dataset if test is True if self.test == True: data = td.images.fromtif(path, ext='tiff').first() else: data = td.images.fromtif(path, ext='tiff') nmf = NMF(k=self.k, percentile=self.percentile, max_iter=self.max_iter, overlap=0.1) # Fitting on the given dataset model = nmf.fit(data, chunk_size=(100,100), padding=(25,25)) merged = model.merge(self.merge_ratio) # Storing found regions in the required format regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) # Writing the results to submission.json with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def test_merging(eng): data, series, truth = make_gaussian(n=20, seed=42, noise=0.5, withparams=True) algorithm = NMF(k=5, percentile=95, max_iter=50, overlap=0.1) model = algorithm.fit(data, chunk_size=(50,100), padding=(15,15)) assert model.regions.count > 20 assert model.merge(overlap=0.5).regions.count <= 20 assert model.merge(overlap=0.1).regions.count < 18
def main(arg): datasets = os.listdir(arg.test) submission = [] for dataset in datasets: if dataset.startswith('neuro'): print('processing dataset:' + dataset) print('loading') data = td.images.fromtif(arg.test + dataset + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=10, percentile=99, max_iter=30, overlap=0.2) model = algorithm.fit(data, chunk_size=(100, 100), padding=(25, 25)) merged = model.merge(0.2) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open(arg.outputDir + 'submission.json', 'w') as f: f.write(json.dumps(submission))
def main(config_data): data = td.images.fromtif(path=config_data["input_path"], engine=sc, npartitions=int(config_data["npartitions"])) ############################################################ #Code for reduction of noise from image using gaussion filter ############################################################ data = data.map(lambda x: gaussian_filter( x, sigma=float(config_data["sigma"]), order=0)) #################################################################################################### # Code for Motion Correction using Image Registration , this process help in alignment of the images #################################################################################################### reference = data.mean().toarray() algorithmMC = CrossCorr() model = algorithmMC.fit(data, reference) shifts = model.transformations registered = model.transform(data) #################################################################################### # Code for Local Non-negative Matrix Factorization for Image Extraction #################################################################################### algorithm = NMF(k=int(config_data["k"]), percentile=int(config_data["percentile"]), min_size=int(config_data["min_size"]), max_iter=int(config_data["max_iter_nmf"]), overlap=float(config_data["overlap_nmf"])) model = algorithm.fit(registered, chunk_size=(int(config_data["chunk_size_1"]), int(config_data["chunk_size_2"])), padding=(int(config_data["padding_1"]), int(config_data["padding_2"]))) #################################################################################### #Code for finding ROI using spatial region extracted in NMF process #################################################################################### merged = model.merge(overlap=float(config_data["overlap_merge"]), max_iter=int(config_data["max_iter_merge"]), k_nearest=int(config_data["k_nearest"])) print('Total no of regions found %g' % merged.regions.count) ##################################################################### #Code for dumping the identified ROI co-ordinates in JSON file ##################################################################### regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': config_data["dataset"], 'regions': regions} submission.append(result) with open(config_data["output"] + '.json', 'w') as f: f.write(json.dumps(submission))
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(100, 200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def compute_nmf(datasets, var_data_path, var_num_components, var_percentile, var_max_iter, var_overlap, var_chunk_size): """ This code is a modified version of the baseline code given below. @ https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113/ This function performs nmf and saves the results. Parameters ---------- datasets : List List of dataset names. var_data_path: String Path to data folder var_num_components: Int The number of components to estimate per block. var_percentile: Int The value for thresholding. var_max_iter : Int The maximum number of algorithm iterations. var_overlap: Int The value for determining whether to merge. var_chunk_size: Int The the chunk size. """ if var_data_path[-1] != '/': var_data_path = var_data_path + '/' submission = [] for dataset in datasets: path = var_data_path + dataset data = td.images.fromtif(path + '/images', ext='tiff') print('done') algorithm = NMF(k=var_num_components, percentile=var_percentile, max_iter=var_max_iter, overlap=var_overlap) model = algorithm.fit(data, chunk_size=(var_chunk_size, var_chunk_size)) print('done') merged = model.merge(var_overlap) print('done') print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] # We slice dataset at 12 to reomvove 'neurofinder' from the name result = {'dataset': dataset[12:], 'regions': regions} submission.append(result) print('writing results') with open(var_data_path + 'submission.json', 'w') as f: f.write(json.dumps(submission))
def test_nmf_many_chunked(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, chunk_size=(50, 100)) assert model.regions.count == 5 assert allclose( sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1, 1])
def model_thunder(img_res): print("the model_creation") submission=[] algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1) model = algorithm.fit(img_res, chunk_size=(50,50), padding=(25,25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = {'dataset': onlyfiles, 'regions': regions} submission.append(result) print('writing results') with open('submission4.json', 'w') as f: f.write(json.dumps(submission)) return
def learn_data(self): for d in self.data: path = self.folder + d print "Analyzing " + path image_data = td.images.fromtif(path + '/images', ext='tiff') algorithm = NMF(k=10, percentile=99, max_iter=50, overlap=0.1) model = algorithm.fit(image_data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': self.data, 'regions': regions} self.submission.append(result)
def applyNMF(args): print("Applying NMF") global imageList global chunkSize global padding global regions model = NMF(k=args.nmf_k, max_iter=args.nmf_maxIter, percentile=args.nmf_percentile, overlap=args.nmf_overlap, min_size=args.nmf_minSize) model = model.fit(imageList, chunk_size=chunkSize, padding=padding) merged = model.merge(overlap=args.nmf_overlap, max_iter=args.fit_mergeIter, k_nearest=args.merge_kNearest) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]
def main(datasets, base_dir, output_dir="output/nmf", gaussian_blur=0, verbose=False, n_components=5, max_iter=20, threshold=99, overlap=0.1, chunk_size=(32, 32), padding=(20, 20), merge_iter=5): """ Performs neuron segementation using the NMF implementation provided by thunder-extraction Results will be written to <output_dir>/00.00-output.json :param datasets: list of datasets (by name) to generate results for :param base_dir: directory that contains the datasets :param output_dir: directory where output file should be written :param k: number of components to estimate per block :param threshold: value for thresholding (higher means more thresholding) :param overlap: value for determining whether to merge (higher means fewer merges) :param chunk_size: process images in chunks of this size :param padding: add this much padding to each chunk :param merge_iter: number of iterations to perform when merging regions :return: array of bath.Result objects representing the result on each dataset """ results = [] for dataset_name in datasets: if verbose: print("Processing dataset %s" % dataset_name) dataset = preprocess.load(dataset_name, base_dir) if verbose: print("Dataset loaded.") if gaussian_blur > 0: chunks = np.array_split(dataset.images, 30) summaries = np.array(list(map(preprocess.compute_summary, chunks))) # summaries = dataset.images = preprocess.gaussian_blur(summaries, gaussian_blur) model = NMF(k=n_components, max_iter=max_iter, percentile=threshold, overlap=overlap, min_size=20) model = model.fit(dataset.images, chunk_size=chunk_size, padding=padding) merged = model.merge(overlap=overlap, max_iter=merge_iter, k_nearest=20) regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions] result = Result(name=dataset_name, regions=regions) results.append(result) if verbose: print("Done with dataset %s" % dataset_name) if dataset.has_ground_truth() and verbose: f_score = result.f_score(dataset.true_regions) print("Combined score for dataset %s: %0.4f" % (dataset_name, f_score)) if verbose: print("Writing results to %s" % output_dir) postprocess.write_results(results, output_dir, name="nmf-output.json") return results
def NMF_experiments(k_value=5, max_size_value='full', min_size_value=20, percentile_value=99, max_iterations=50, overlap_value=0.1): """ The algorithm takes the following parameters. k number of components to estimate per block max_size maximum size of each region min_size minimum size for each region max_iter maximum number of algorithm iterations percentile value for thresholding (higher means more thresholding) overlap value for determining whether to merge (higher means fewer merges) """ datasets = [ '00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test', '02.01.test', '03.00.test', '04.00.test', '04.01.test' ] submission = [] for dataset in datasets: print('processing dataset: %s' % dataset) print('loading') path = '../data/neurofinder.' + dataset data = td.images.fromtif(path + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=k_value, max_size=max_size_value, min_size=min_size_value, percentile=percentile_value, max_iter=max_iterations, overlap=overlap_value) model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def fit_NMF(data, n_comps=3, iters=50, percentile=95, chunk_size=(60, 60), overlap=0.1): """ fits nmf to dataset Parameters ---------- data : numpy matrix the video to which the NMF is fit n_comps : int number of components to estimate per block iters : int max number of algorithm iterations percentile : int the value for thresholding chunk_size : tuple width and height of chunk, two values overlap : float value determining whether to merge Returns ------- regions : list a list of regions extracted by NMF """ model = NMF(k=n_comps, max_iter=iters, percentile=percentile, overlap=overlap) model = model.fit(data, chunk_size=chunk_size, padding=(20, 20)) merged = model.merge(overlap=overlap, max_iter=iters, k_nearest=20) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] return regions
def nmf(args): dataset = args.dataset if dataset[len(dataset)-1] != '/': dataset += '/' dirs = sorted(glob(dataset + '*/')) for d in dirs: print("Working on folder", d) # Read images from the dataset path = d + 'images/' V = td.images.fromtif(path, ext = 'tiff') # denoising and smoothing filters V.median_filter() V.gaussian_filter() # Applying NMF on data algorithm = NMF(k = 10, max_iter = 30, percentile = 99) model = algorithm.fit(V, chunk_size = (50,50), padding = (25, 25)) merged = model.merge(overlap = 0.1, max_iter = 2, k_nearest = 5) # extracting ROI roi = [{'coordinates': r.coordinates.tolist()} for r in merged.regions] # converting to json format dataset = d[d.find('neurofinder.') + 12 : d.find('neurofinder.') + d[d.find('neurofinder.'):].find('/')] json_string = {'dataset': dataset, 'regions': roi} # writing to json file output = args.output if not os.path.exists(output): os.makedirs(output) if output[len(output)-1] != '/': output += '/' f = open(output + dataset + '.json', 'w') f.write(json.dumps(json_string)) f.close()
def model_implementaiton(datasets, path): submission = [] for dataset in datasets: print(path) print('processing dataset: %s' % dataset) print('loading') temp_path = path + dataset print(temp_path) data = td.images.fromtif(temp_path + '/images', ext='tiff') print('analyzing') #algorithm = NMF(k=5, percentile=99, max_iter=50, overlap=0.1) algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1) model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open('submission.json', 'w') as f: f.write(json.dumps(submission))
def extract_command(input, output, diameter, method, url, overwrite): input = abspath(input) output = input + '_extracted' if output is None else abspath(output) if isfile(join(output, 'regions-' + method + '.json')) and not overwrite: error('file already exists and overwrite is false') return elif not isdir(output): mkdir(output) engine = setup_spark(url) status('reading data from %s' % input) if len(glob(join(input, '*.tif'))) > 0: data = fromtif(input, engine=engine) ext = 'tif' elif len(glob(join(input, '*.tiff'))) > 0: data = fromtif(input, ext='tiff', engine=engine) ext = 'tif' elif len(glob(join(input, '*.bin'))) > 0: data = frombinary(input, engine=engine) ext = 'bin' else: error('no tif or binary files found in %s' % input) return status('extracting') if method == 'CC': algorithm = CC(diameter=diameter, clip_limit=0.04, theshold=0.2, sigma_blur=1, boundary=(1,1)) unmerged = algorithm.fit(data) model = unmerged.merge(0.1) model = filter_shape(model, min_diameter = 0.7*diameter, max_diameter = 1.3*diameter, min_eccentricity = 0.2) elif method == 'NMF': algorithm = NMF(k=10, percentile=99, max_iter=50, overlap=0.1) unmerged = algorithm.fit(data, chunk_size=(50,50), padding=(25,25)) model = unmerged.merge(overlap=0.20, max_iter=3, k_nearest=10) else: error('extraction method %s not recognized' % method) model.save(join(output, 'regions-' + method + '.json')) success('extraction complete')
def main(arg): datasets = os.listdir(arg.test) submission = [] for dataset in datasets: if dataset.startswith('neuro'): if '00.00' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 elif '00.01' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 elif '01.00' in dataset: k_param = 5 percentile_param = 95 max_iter_param = 70 chunk_size_param = 50 elif '01.01' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 40 chunk_size_param = 50 elif '02.00' in dataset: k_param = 5 percentile_param = 99 max_iter_param = 30 chunk_size_param = 50 elif '02.01' in dataset: k_param = 5 percentile_param = 99 max_iter_param = 30 chunk_size_param = 50 elif '03.00' in dataset: k_param = 10 percentile_param = 95 max_iter_param = 70 chunk_size_param = 45 elif '04.00' in dataset: k_param = 5 percentile_param = 97 max_iter_param = 60 chunk_size_param = 50 elif '04.01' in dataset: k_param = 5 percentile_param = 95 max_iter_param = 60 chunk_size_param = 50 print('processing dataset:' + dataset) print('loading') data = td.images.fromtif(arg.test + dataset + '/images', ext='tiff') print('analyzing') algorithm = NMF(k=k_param, percentile=percentile_param, max_iter=max_iter_param, overlap=0.1) model = algorithm.fit(data, chunk_size=(chunk_size_param, chunk_size_param), padding=(25, 25)) merged = model.merge(0.1) print('found %g regions' % merged.regions.count) regions = [{ 'coordinates': region.coordinates.tolist() } for region in merged.regions] result = {'dataset': dataset, 'regions': regions} submission.append(result) print('writing results') with open(arg.outputDir + 'submission.json', 'w') as f: f.write(json.dumps(submission))
# generate data from extraction.utils import make_gaussian data = make_gaussian(noise=0.5) # fit a model from extraction import NMF model = NMF().fit(data, chunk_size=(100, 200)) # show estimated sources import matplotlib.pyplot as plt from showit import image image( model.regions.mask((100, 200), fill=None, stroke='deeppink', base=data.mean().toarray() / 2)) plt.show()
"""NMF for the neuron images. NMF(non-negative factorization) is the factorization method for a matrix to find features from it. For the neuron images, it would identify the possible neurons that can spark in the given scenario. The implementation uses the thunder API, which can do NMF parallel with Spark. The code is more or less similar to the given neuron example link in the references except updated parameters and use of spark. Example: How to run:: $ python nmf.py -a<Download Path> -b<File Names for the files to download(comma separated) (e.g neurofinder.00.00.test,neurofinder.00.01.test)> -c<Storage Path in local machine> -d<Output Path in local machine> -e<Spark parameters settings (comma separated) (e.g. spark.driver.memory=6G,spark.executor.memory=6G)> Todo: Remove parameter hardcodings for NMF parameters. References: NMF Wiki: https://en.wikipedia.org/wiki/Non-negative_matrix_factorization Thunder extraction API : https://github.com/thunder-project/thunder-extraction Neuron Result comparison : https://github.com/codeneuro/neurofinder NMF example for neuron images in thunder: https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113 """
def test_nmf_one(eng): data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 1 assert allclose(model.regions.center, truth.regions.center, 0.1)
def test_nmf_many(eng): data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True) algorithm = NMF() model = algorithm.fit(data, block_size=(100,200)) assert model.regions.count == 5 assert allclose(sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1,1])
path = "/home/vyom/UGA/DSP/Project3/data/test/" onlyfiles = [f for f in listdir(path)] # # Creating list of thunder image Vectors of all dataset data = list() for i in range(len(onlyfiles)): data.append( td.images.fromtif(path=path + onlyfiles[i] + '/images', engine=sc, ext="tiff")) print("DATA READ!") # # Creating NMF model algorithm = NMF(k=10, max_iter=20, percentile=95, overlap=0.1) # # Fitting models for each dataset model = list() for i in range(len(data)): model.append(algorithm.fit(data[i], chunk_size=(50, 50))) merged = list() for i in range(len(model)): merged.append(model[i].merge(overlap=0.1)) # Saving region coordinates as model: for i in range(len(merged)): coordinates = [{
# In[4]: #the path of data is hard-code for now. #when running as python file , we can take it as an input argument data = td.images.fromtif(path='/home/hiten/Desktop/neurofinder.03.00/images', engine=sc, ext="tiff") print("DATA READ!") # # Creating NMF model # In[15]: #create the model and play with various values of k,percentile to get efficient results algorithm = NMF(k=5, max_iter=30, percentile=95, overlap=0.1) # # Fitting models for each dataset # In[16]: #fit our data in the model model = algorithm.fit(data, chunk_size=(50, 50)) # In[17]: #fixing overlapping pixels merged = model.merge(overlap=0.1) # In[18]:
# generate data from pyspark import SparkContext sc = SparkContext() from extraction.utils import make_gaussian data = make_gaussian(engine=sc) # fit a model from extraction import NMF model = NMF().fit(data) # extract sources by transforming data sources = model.transform(data) print model.regions.count