Esempio n. 1
0
def main(setName=['00.00', '00.01','01.00','01.01','02.00','02.01','03.00','04.00','04.01'],
            base='caesar', _k=5, _percentile=99, _max_iter=50, _overlap=0.1, _chunk_size=32,
            _padding=25, _merge=0.1):
    '''
    Main method for NMF approach. This is a wrapper built upon the original pipeline of NMF in Thunder Extraction.
    The code for putting data into json files is from:
    https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113
    '''
    submission = []
    for data in setName:
        images = ThunderNMF.load(setName, base)
        images = ThunderNMF.grayScale(images)
        print ('The shape of each training image after preprocessing is {}'.format(images[1].shape))
        print ('Applying median filter for {}.test'.format(data))
        images = ThunderNMF.medianFilter(images)
        print ('Applying NMF for {}.test.....'.format(data))
        algorithm = NMF(k=_k, percentile=_percentile, max_iter=_max_iter, overlap=_overlap)
        model = algorithm.fit(images, chunk_size=(_chunk_size,_chunk_size), padding=(_padding,_padding))
        print ('Merge regions for {}.test....'.format(data))
        merged = model.merge(_merge)
        regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]
        result = {'dataset': '{}.test'.format(data), 'regions': regions}
        submission.append(result)

        # show a message for processing
        print ('Completed processing results for {}.test'.format(data))

    with open('{}.json'.format('submission'), 'w') as f:
        f.write(json.dumps(submission))
    print ('Done!')
def test_nmf_many_padding(eng):
    data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True)
    algorithm = NMF()
    model = algorithm.fit(data, chunk_size=(50, 100), padding=(25, 25))
    distances = cdist(model.regions.center, truth.regions.center)
    assert model.regions.count == 10
    assert allclose(sum(distances < 10), [2, 1, 2, 4, 1])
Esempio n. 3
0
    def get_output(self):
        """
        Writes output of NMF model into JSON file with the name
        `submission.json`
        """
        submission = []
        for dataset in self.datasets:
            print('Loading dataset: %s ' %dataset)
            dataset_path = 'neurofinder.' + dataset
            path = os.path.join('D:/Spring2019/DataSciencePracticum/p3', dataset_path, 'images')
            
            # Getting the images data from path
            # Returns only first image of dataset if test is True
            if self.test == True:
                data = td.images.fromtif(path, ext='tiff').first()
            else:
                data = td.images.fromtif(path, ext='tiff')

            nmf = NMF(k=self.k, percentile=self.percentile, max_iter=self.max_iter, 
                overlap=0.1)
            
            # Fitting on the given dataset
            model = nmf.fit(data, chunk_size=(100,100), padding=(25,25))
            merged = model.merge(self.merge_ratio)
            
            # Storing found regions in the required format
            regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]
            result = {'dataset': dataset, 'regions': regions}
            submission.append(result)
        # Writing the results to submission.json
        with open('submission.json', 'w') as f:
            f.write(json.dumps(submission))
def test_merging(eng):
    data, series, truth = make_gaussian(n=20, seed=42, noise=0.5, withparams=True)
    algorithm = NMF(k=5, percentile=95, max_iter=50, overlap=0.1)
    model = algorithm.fit(data, chunk_size=(50,100), padding=(15,15))
    assert model.regions.count > 20
    assert model.merge(overlap=0.5).regions.count <= 20
    assert model.merge(overlap=0.1).regions.count < 18
Esempio n. 5
0
def main(arg):
    datasets = os.listdir(arg.test)
    submission = []
    for dataset in datasets:
        if dataset.startswith('neuro'):
            print('processing dataset:' + dataset)
            print('loading')
            data = td.images.fromtif(arg.test + dataset + '/images',
                                     ext='tiff')
            print('analyzing')
            algorithm = NMF(k=10, percentile=99, max_iter=30, overlap=0.2)
            model = algorithm.fit(data,
                                  chunk_size=(100, 100),
                                  padding=(25, 25))
            merged = model.merge(0.2)
            print('found %g regions' % merged.regions.count)
            regions = [{
                'coordinates': region.coordinates.tolist()
            } for region in merged.regions]
            result = {'dataset': dataset, 'regions': regions}
            submission.append(result)

    print('writing results')
    with open(arg.outputDir + 'submission.json', 'w') as f:
        f.write(json.dumps(submission))
Esempio n. 6
0
def main(config_data):

    data = td.images.fromtif(path=config_data["input_path"],
                             engine=sc,
                             npartitions=int(config_data["npartitions"]))
    ############################################################
    #Code for reduction of noise from image using gaussion filter
    ############################################################
    data = data.map(lambda x: gaussian_filter(
        x, sigma=float(config_data["sigma"]), order=0))

    ####################################################################################################
    # Code for Motion Correction using Image Registration , this process help in alignment of the images
    ####################################################################################################
    reference = data.mean().toarray()
    algorithmMC = CrossCorr()
    model = algorithmMC.fit(data, reference)
    shifts = model.transformations
    registered = model.transform(data)

    ####################################################################################
    # Code for Local Non-negative Matrix Factorization for Image Extraction
    ####################################################################################

    algorithm = NMF(k=int(config_data["k"]),
                    percentile=int(config_data["percentile"]),
                    min_size=int(config_data["min_size"]),
                    max_iter=int(config_data["max_iter_nmf"]),
                    overlap=float(config_data["overlap_nmf"]))

    model = algorithm.fit(registered,
                          chunk_size=(int(config_data["chunk_size_1"]),
                                      int(config_data["chunk_size_2"])),
                          padding=(int(config_data["padding_1"]),
                                   int(config_data["padding_2"])))

    ####################################################################################
    #Code for finding ROI using spatial region extracted in NMF process
    ####################################################################################
    merged = model.merge(overlap=float(config_data["overlap_merge"]),
                         max_iter=int(config_data["max_iter_merge"]),
                         k_nearest=int(config_data["k_nearest"]))

    print('Total no of regions found %g' % merged.regions.count)

    #####################################################################
    #Code for dumping the identified ROI co-ordinates in JSON file
    #####################################################################
    regions = [{
        'coordinates': region.coordinates.tolist()
    } for region in merged.regions]

    result = {'dataset': config_data["dataset"], 'regions': regions}
    submission.append(result)
    with open(config_data["output"] + '.json', 'w') as f:
        f.write(json.dumps(submission))
Esempio n. 7
0
def test_nmf_one(eng):
    data, series, truth = make_gaussian(n=1,
                                        noise=0.5,
                                        seed=42,
                                        engine=eng,
                                        withparams=True)
    algorithm = NMF()
    model = algorithm.fit(data, chunk_size=(100, 200))
    assert model.regions.count == 1
    assert allclose(model.regions.center, truth.regions.center, 0.1)
Esempio n. 8
0
def compute_nmf(datasets, var_data_path, var_num_components, var_percentile,
                var_max_iter, var_overlap, var_chunk_size):
    """
    This code is a modified version of the baseline code given below.
    @ https://gist.github.com/freeman-lab/330183fdb0ea7f4103deddc9fae18113/
    This function performs nmf and saves the results.

    Parameters
    ----------
    datasets : List
        List of dataset names.
    var_data_path: String
        Path to data folder
    var_num_components: Int
        The number of components to estimate per block.
    var_percentile: Int
        The value for thresholding.
    var_max_iter : Int
        The maximum number of algorithm iterations.
    var_overlap: Int
        The value for determining whether to merge.
    var_chunk_size: Int
        The the chunk size.
    """

    if var_data_path[-1] != '/':
        var_data_path = var_data_path + '/'

    submission = []

    for dataset in datasets:
        path = var_data_path + dataset
        data = td.images.fromtif(path + '/images', ext='tiff')
        print('done')
        algorithm = NMF(k=var_num_components,
                        percentile=var_percentile,
                        max_iter=var_max_iter,
                        overlap=var_overlap)
        model = algorithm.fit(data,
                              chunk_size=(var_chunk_size, var_chunk_size))
        print('done')
        merged = model.merge(var_overlap)
        print('done')
        print('found %g regions' % merged.regions.count)
        regions = [{
            'coordinates': region.coordinates.tolist()
        } for region in merged.regions]
        # We slice dataset at 12 to reomvove 'neurofinder' from the name
        result = {'dataset': dataset[12:], 'regions': regions}
        submission.append(result)

        print('writing results')

        with open(var_data_path + 'submission.json', 'w') as f:
            f.write(json.dumps(submission))
Esempio n. 9
0
def test_nmf_many_padding(eng):
    data, series, truth = make_gaussian(n=5,
                                        noise=0.5,
                                        seed=42,
                                        engine=eng,
                                        withparams=True)
    algorithm = NMF()
    model = algorithm.fit(data, chunk_size=(50, 100), padding=(25, 25))
    distances = cdist(model.regions.center, truth.regions.center)
    assert model.regions.count == 10
    assert allclose(sum(distances < 10), [2, 1, 2, 4, 1])
Esempio n. 10
0
def test_nmf_many_chunked(eng):
    data, series, truth = make_gaussian(n=5,
                                        noise=0.5,
                                        seed=42,
                                        engine=eng,
                                        withparams=True)
    algorithm = NMF()
    model = algorithm.fit(data, chunk_size=(50, 100))
    assert model.regions.count == 5
    assert allclose(
        sum(cdist(model.regions.center, truth.regions.center) < 10),
        [1, 1, 1, 1, 1])
def model_thunder(img_res):
    print("the model_creation")
    submission=[]
    algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1)
    model = algorithm.fit(img_res, chunk_size=(50,50), padding=(25,25))
    merged = model.merge(0.1)
    print('found %g regions' % merged.regions.count)
    regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]
    result = {'dataset': onlyfiles, 'regions': regions}
    submission.append(result)
    print('writing results')
    with open('submission4.json', 'w') as f:
        f.write(json.dumps(submission))
    return
Esempio n. 12
0
 def learn_data(self):
     for d in self.data:
         path = self.folder + d
         print "Analyzing " + path
         image_data = td.images.fromtif(path + '/images', ext='tiff')
         algorithm = NMF(k=10, percentile=99, max_iter=50, overlap=0.1)
         model = algorithm.fit(image_data,
                               chunk_size=(50, 50),
                               padding=(25, 25))
         merged = model.merge(0.1)
         regions = [{
             'coordinates': region.coordinates.tolist()
         } for region in merged.regions]
         result = {'dataset': self.data, 'regions': regions}
         self.submission.append(result)
Esempio n. 13
0
def applyNMF(args):
    print("Applying NMF")
    global imageList
    global chunkSize
    global padding
    global regions
    
    model = NMF(k=args.nmf_k, max_iter=args.nmf_maxIter, 
                percentile=args.nmf_percentile, overlap=args.nmf_overlap, 
                min_size=args.nmf_minSize)
    model = model.fit(imageList, chunk_size=chunkSize, 
                      padding=padding)
    merged = model.merge(overlap=args.nmf_overlap, max_iter=args.fit_mergeIter, 
                         k_nearest=args.merge_kNearest)
    regions = [{'coordinates': region.coordinates.tolist()} 
        for region in merged.regions]
Esempio n. 14
0
def main(datasets, base_dir, output_dir="output/nmf", gaussian_blur=0, verbose=False,
         n_components=5, max_iter=20, threshold=99, overlap=0.1, chunk_size=(32, 32), padding=(20, 20), merge_iter=5):
    """
    Performs neuron segementation using the NMF implementation provided by thunder-extraction
    Results will be written to <output_dir>/00.00-output.json

    :param datasets: list of datasets (by name) to generate results for
    :param base_dir: directory that contains the datasets
    :param output_dir: directory where output file should be written
    :param k: number of components to estimate per block
    :param threshold: value for thresholding (higher means more thresholding)
    :param overlap: value for determining whether to merge (higher means fewer merges)
    :param chunk_size: process images in chunks of this size
    :param padding: add this much padding to each chunk
    :param merge_iter: number of iterations to perform when merging regions
    :return: array of bath.Result objects representing the result on each dataset
    """
    results = []
    for dataset_name in datasets:
        if verbose: print("Processing dataset %s" % dataset_name)
        dataset = preprocess.load(dataset_name, base_dir)
        if verbose: print("Dataset loaded.")

        if gaussian_blur > 0:
            chunks = np.array_split(dataset.images, 30)
            summaries = np.array(list(map(preprocess.compute_summary, chunks)))
            # summaries =
            dataset.images = preprocess.gaussian_blur(summaries, gaussian_blur)

        model = NMF(k=n_components, max_iter=max_iter, percentile=threshold, overlap=overlap, min_size=20)
        model = model.fit(dataset.images, chunk_size=chunk_size, padding=padding)
        merged = model.merge(overlap=overlap, max_iter=merge_iter, k_nearest=20)
        regions = [{'coordinates': region.coordinates.tolist()} for region in merged.regions]

        result = Result(name=dataset_name, regions=regions)
        results.append(result)

        if verbose: print("Done with dataset %s" % dataset_name)

        if dataset.has_ground_truth() and verbose:
            f_score = result.f_score(dataset.true_regions)
            print("Combined score for dataset %s: %0.4f" % (dataset_name, f_score))

    if verbose: print("Writing results to %s" % output_dir)
    postprocess.write_results(results, output_dir, name="nmf-output.json")
    return results
def NMF_experiments(k_value=5,
                    max_size_value='full',
                    min_size_value=20,
                    percentile_value=99,
                    max_iterations=50,
                    overlap_value=0.1):
    """
	The algorithm takes the following parameters.
	k number of components to estimate per block
	max_size maximum size of each region
	min_size minimum size for each region
	max_iter maximum number of algorithm iterations
	percentile value for thresholding (higher means more thresholding)
	overlap value for determining whether to merge (higher means fewer merges)
	"""
    datasets = [
        '00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test',
        '02.01.test', '03.00.test', '04.00.test', '04.01.test'
    ]

    submission = []

    for dataset in datasets:
        print('processing dataset: %s' % dataset)
        print('loading')
        path = '../data/neurofinder.' + dataset
        data = td.images.fromtif(path + '/images', ext='tiff')
        print('analyzing')
        algorithm = NMF(k=k_value,
                        max_size=max_size_value,
                        min_size=min_size_value,
                        percentile=percentile_value,
                        max_iter=max_iterations,
                        overlap=overlap_value)
        model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25))
        merged = model.merge(0.1)
        print('found %g regions' % merged.regions.count)
        regions = [{
            'coordinates': region.coordinates.tolist()
        } for region in merged.regions]
        result = {'dataset': dataset, 'regions': regions}
        submission.append(result)
    print('writing results')
    with open('submission.json', 'w') as f:
        f.write(json.dumps(submission))
Esempio n. 16
0
def fit_NMF(data,
            n_comps=3,
            iters=50,
            percentile=95,
            chunk_size=(60, 60),
            overlap=0.1):
    """
    fits nmf to dataset

    Parameters
    ----------
    data : numpy matrix
        the video to which the NMF is fit
    n_comps : int
        number of components to estimate per block
    iters : int
        max number of algorithm iterations
    percentile : int
        the value for thresholding
    chunk_size : tuple
        width and height of chunk, two values
    overlap : float
        value determining whether to merge

    Returns
    -------
    regions : list
        a list of regions extracted by NMF
    """

    model = NMF(k=n_comps,
                max_iter=iters,
                percentile=percentile,
                overlap=overlap)
    model = model.fit(data, chunk_size=chunk_size, padding=(20, 20))
    merged = model.merge(overlap=overlap, max_iter=iters, k_nearest=20)
    regions = [{
        'coordinates': region.coordinates.tolist()
    } for region in merged.regions]

    return regions
def nmf(args):

	dataset = args.dataset
	if dataset[len(dataset)-1] != '/':
		dataset += '/'
	dirs = sorted(glob(dataset + '*/'))

	for d in dirs:

		print("Working on folder", d)

		# Read images from the dataset
		path = d + 'images/'
		V = td.images.fromtif(path, ext = 'tiff')
		
		# denoising and smoothing filters
		V.median_filter()
		V.gaussian_filter()

		# Applying NMF on data
		algorithm = NMF(k = 10, max_iter = 30, percentile = 99)
		model = algorithm.fit(V, chunk_size = (50,50), padding = (25, 25))
		merged = model.merge(overlap = 0.1, max_iter = 2, k_nearest = 5)

		# extracting ROI
		roi = [{'coordinates': r.coordinates.tolist()} for r in merged.regions]
		
		# converting to json format
		dataset = d[d.find('neurofinder.') + 12 : d.find('neurofinder.') + d[d.find('neurofinder.'):].find('/')]
		json_string = {'dataset': dataset, 'regions': roi}

		# writing to json file
		output = args.output
		if not os.path.exists(output):
			os.makedirs(output)
		if output[len(output)-1] != '/':
			output += '/'
		f = open(output + dataset + '.json', 'w')
		f.write(json.dumps(json_string))
		f.close()
Esempio n. 18
0
def model_implementaiton(datasets, path):
    submission = []
    for dataset in datasets:
        print(path)
        print('processing dataset: %s' % dataset)
        print('loading')
        temp_path = path + dataset
        print(temp_path)
        data = td.images.fromtif(temp_path + '/images', ext='tiff')
        print('analyzing')
        #algorithm = NMF(k=5, percentile=99, max_iter=50, overlap=0.1)
        algorithm = NMF(k=5, percentile=99, max_iter=10, overlap=0.1)
        model = algorithm.fit(data, chunk_size=(50, 50), padding=(25, 25))
        merged = model.merge(0.1)
        print('found %g regions' % merged.regions.count)
        regions = [{
            'coordinates': region.coordinates.tolist()
        } for region in merged.regions]
        result = {'dataset': dataset, 'regions': regions}
        submission.append(result)
    print('writing results')
    with open('submission.json', 'w') as f:
        f.write(json.dumps(submission))
Esempio n. 19
0
def main(arg):
    datasets = os.listdir(arg.test)
    submission = []
    for dataset in datasets:
        if dataset.startswith('neuro'):
            if '00.00' in dataset:
                k_param = 10
                percentile_param = 95
                max_iter_param = 60
                chunk_size_param = 50
            elif '00.01' in dataset:
                k_param = 10
                percentile_param = 95
                max_iter_param = 60
                chunk_size_param = 50
            elif '01.00' in dataset:
                k_param = 5
                percentile_param = 95
                max_iter_param = 70
                chunk_size_param = 50
            elif '01.01' in dataset:
                k_param = 10
                percentile_param = 95
                max_iter_param = 40
                chunk_size_param = 50
            elif '02.00' in dataset:
                k_param = 5
                percentile_param = 99
                max_iter_param = 30
                chunk_size_param = 50
            elif '02.01' in dataset:
                k_param = 5
                percentile_param = 99
                max_iter_param = 30
                chunk_size_param = 50
            elif '03.00' in dataset:
                k_param = 10
                percentile_param = 95
                max_iter_param = 70
                chunk_size_param = 45
            elif '04.00' in dataset:
                k_param = 5
                percentile_param = 97
                max_iter_param = 60
                chunk_size_param = 50
            elif '04.01' in dataset:
                k_param = 5
                percentile_param = 95
                max_iter_param = 60
                chunk_size_param = 50
            print('processing dataset:' + dataset)
            print('loading')
            data = td.images.fromtif(arg.test + dataset + '/images',
                                     ext='tiff')
            print('analyzing')
            algorithm = NMF(k=k_param,
                            percentile=percentile_param,
                            max_iter=max_iter_param,
                            overlap=0.1)
            model = algorithm.fit(data,
                                  chunk_size=(chunk_size_param,
                                              chunk_size_param),
                                  padding=(25, 25))
            merged = model.merge(0.1)
            print('found %g regions' % merged.regions.count)
            regions = [{
                'coordinates': region.coordinates.tolist()
            } for region in merged.regions]
            result = {'dataset': dataset, 'regions': regions}
            submission.append(result)

    print('writing results')
    with open(arg.outputDir + 'submission.json', 'w') as f:
        f.write(json.dumps(submission))
#Declare the output array.
output_array = []
#Get the list of directories inside the
for root, dirs, files in os.walk(store_data_path.value):
    for dir in dirs:
        #Load data from the source path and convert into RDD for parallalization.
        images = td.images.fromtif(store_data_path.value + '/' + dir +
                                   '/images',
                                   ext='tiff')
        #        images = td.series.fromarray(images,engine=sc)

        #Run the NMF over the algorithms.
        #TODO: Set the parameters from command-line
        nmf_algo = NMF(k=10,
                       max_iter=50)  #Use default percentile 95 and overlap.
        nmf_model = nmf_algo.fit(images, chunk_size=(64, 64),
                                 padding=(8, 8))  #Set after some experiments.
        nmf_merge = nmf_model.merge()

        #Write the output to the final array to convert
        #The output is array, not RDD. So no reason to use RDD operations for sequential tasks.
        nmf_regions = [{
            'coordinates': region.coordinates.tolist()
        } for region in nmf_merge.regions]
        output = {'dataset': dir, 'regions': nmf_regions}
        output_array.append(output)
    break

#Writing the output file.
with open(store_data_path.value + '/output.json', 'w') as output_file:
    output_file.write(json.dumps(output_array))
Esempio n. 21
0
def test_nmf_one(eng):
  data, series, truth = make_gaussian(n=1, noise=0.5, seed=42, engine=eng, withparams=True)
  algorithm = NMF()
  model = algorithm.fit(data, block_size=(100,200))
  assert model.regions.count == 1
  assert allclose(model.regions.center, truth.regions.center, 0.1)
Esempio n. 22
0
def test_nmf_many(eng):
  data, series, truth = make_gaussian(n=5, noise=0.5, seed=42, engine=eng, withparams=True)
  algorithm = NMF()
  model = algorithm.fit(data, block_size=(100,200))
  assert model.regions.count == 5
  assert allclose(sum(cdist(model.regions.center, truth.regions.center) < 10), [1, 1, 1, 1,1])
Esempio n. 23
0
                         ext="tiff")
print("DATA READ!")

# # Creating NMF model

# In[15]:

#create the model and play with various values of k,percentile to get efficient results
algorithm = NMF(k=5, max_iter=30, percentile=95, overlap=0.1)

# # Fitting models for each dataset

# In[16]:

#fit our data in the model
model = algorithm.fit(data, chunk_size=(50, 50))

# In[17]:

#fixing overlapping pixels
merged = model.merge(overlap=0.1)

# In[18]:

#saving cordinates value in a list and passing it to jsonString
coordinates = [{'coordinates': x.coordinates.tolist()} for x in merged.regions]

# In[19]:

jsonString = {'dataset': "03.00.test", 'regions': coordinates}
Esempio n. 24
0
for i in range(len(onlyfiles)):
    data.append(
        td.images.fromtif(path=path + onlyfiles[i] + '/images',
                          engine=sc,
                          ext="tiff"))
print("DATA READ!")

# # Creating NMF model

algorithm = NMF(k=10, max_iter=20, percentile=95, overlap=0.1)

# # Fitting models for each dataset

model = list()
for i in range(len(data)):
    model.append(algorithm.fit(data[i], chunk_size=(50, 50)))

merged = list()
for i in range(len(model)):
    merged.append(model[i].merge(overlap=0.1))

# Saving region coordinates as model:

for i in range(len(merged)):
    coordinates = [{
        'coordinates': x.coordinates.tolist()
    } for x in merged[i].regions]
    jsonString = {
        'dataset': onlyfiles[i].replace("neurofinder.", ""),
        'regions': coordinates
    }