def test_sima(self): """ (BlockMethod) with SIMA strategy """ # NOTE: this test was brittle and failed non-deterministically with any # more than one source import sima.segment # construct the SIMA strategy simaStrategy = sima.segment.STICA(components=1) simaStrategy.append(sima.segment.SparseROIsFromMasks(min_size=20)) simaStrategy.append(sima.segment.SmoothROIBoundaries()) simaStrategy.append(sima.segment.MergeOverlapping(threshold=0.5)) tsc = ThunderContext(self.sc) data = tsc.makeExample('sources', dims=(60, 60), centers=[[20, 15]], noise=0.5, seed=42) # create and fit the thunder extraction strategy strategy = SourceExtraction('sima', simaStrategy=simaStrategy) model = strategy.fit(data, size=(30, 30)) assert (model.count == 1) # check that the one center is recovered ep = 1.5 assert (model[0].distance([20, 15]) < ep)
def test_sima(self): """ (BlockMethod) with SIMA strategy """ # NOTE: this test was brittle and failed non-deterministically with any # more than one source import sima.segment # construct the SIMA strategy simaStrategy = sima.segment.STICA(components=1) simaStrategy.append(sima.segment.SparseROIsFromMasks(min_size=20)) simaStrategy.append(sima.segment.SmoothROIBoundaries()) simaStrategy.append(sima.segment.MergeOverlapping(threshold=0.5)) tsc = ThunderContext(self.sc) data = tsc.makeExample('sources', dims=(60, 60), centers=[[20, 15]], noise=0.5, seed=42) # create and fit the thunder extraction strategy strategy = SourceExtraction('sima', simaStrategy=simaStrategy) model = strategy.fit(data, size=(30, 30)) assert(model.count == 1) # check that the one center is recovered ep = 1.5 assert(model[0].distance([20, 15]) < ep)
def test_local_max(self): """ (FeatureMethod) localmax with defaults """ tsc = ThunderContext(self.sc) data = tsc.makeExample('sources', dims=[60, 60], centers=[[10, 10], [40, 40]], noise=0.0, seed=42) model = SourceExtraction('localmax').fit(data) # order is irrelevant, but one of these must be true cond1 = (model[0].distance([10, 10]) == 0) and (model[1].distance([40, 40]) == 0) cond2 = (model[0].distance([40, 40]) == 0) and (model[1].distance([10, 10]) == 0) assert(cond1 or cond2)
def test_nmf(self): """ (BlockMethod) nmf with defaults """ tsc = ThunderContext(self.sc) data = tsc.makeExample('sources', dims=(60, 60), centers=[[20, 20], [40, 40]], noise=0.1, seed=42) model = SourceExtraction('nmf', componentsPerBlock=1).fit(data, size=(30, 30)) # order is irrelevant, but one of these must be true ep = 0.50 cond1 = (model[0].distance([20, 20]) < ep) and (model[1].distance([40, 40]) < ep) cond2 = (model[0].distance([40, 40]) < ep) and (model[1].distance([20, 20]) < ep) assert(cond1 or cond2)
def test_sima(self): """ (BlockMethod) with SIMA strategy """ import sima.segment # construct the SIMA strategy simaStrategy = sima.segment.STICA(components=2) simaStrategy.append(sima.segment.SparseROIsFromMasks(min_size=20)) simaStrategy.append(sima.segment.SmoothROIBoundaries()) simaStrategy.append(sima.segment.MergeOverlapping(threshold=0.5)) tsc = ThunderContext(self.sc) data = tsc.makeExample('sources', dims=(60, 60), centers=[[20, 15], [40, 45]], noise=0.1, seed=42) # create and fit the thunder extraction strategy strategy = SourceExtraction('sima', simaStrategy=simaStrategy) model = strategy.fit(data, size=(30, 30)) # order is irrelevant, but one of these must be true ep = 1.5 cond1 = (model[0].distance([20, 15]) < ep) and (model[1].distance([40, 45]) < ep) cond2 = (model[1].distance([20, 15]) < ep) and (model[0].distance([40, 45]) < ep) assert(cond1 or cond2)
if use_existing_parameters == 1: with open(Exp_Folder+filename_save_prefix_forICA+'_save_ICA_variables') as f: ICA_components_ind, num_ICA_colors_ind, color_map_ind,\ ICA_components_eachexp, num_ICA_colors_eachexp, color_map_eachexp,\ ICA_components_allexp, num_ICA_colors_allexp, color_map_allexp,colors_ica = pickle.load(f) # Go into the main function that does ICA for indiviudal trials from ica_thunder_analysis import run_analysis_individualexps from ica_thunder_analysis import run_analysis_eachexp from ica_thunder_analysis import run_analysis_allexp from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderICA") if files_to_do_ICA[0]== 1: run_analysis_individualexps(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_ind, PCA_components_ind, num_ICA_colors_ind, color_map_ind,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) if files_to_do_ICA[1]== 1: run_analysis_eachexp(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_eachexp, PCA_components_eachexp, num_ICA_colors_eachexp, color_map_eachexp,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) if files_to_do_ICA[2]== 1: run_analysis_allexp(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_allexp, PCA_components_allexp, num_ICA_colors_allexp, color_map_allexp,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) ############# Save all imput parameters with open(Exp_Folder+filename_save_prefix_forICA+'_save_ICA_variables', 'w') as f:
if __name__ == "__main__": parser = optparse.OptionParser( description="fit a regression model", usage="%prog datafile modelfile outputdir [options]") parser.add_option("--regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_option("--k", type=int, default=2) opts, args = parser.parse_args() try: datafile = args[0] modelfile = args[1] outputdir = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="regresswithpca") data = tsc.loadSeries(datafile) model = RegressionModel.load(modelfile, opts.regressmode) # do regression betas, stats, resid = model.fit(data) pca = PCA(opts.k).fit(betas) # do PCA traj = model.fit(data, pca.comps) # get trajectories outputdir += "-regress" export(pca.comps, outputdir, "comps", "matlab") export(pca.latent, outputdir, "latent", "matlab") export(pca.scores, outputdir, "scores", "matlab") export(traj, outputdir, "traj", "matlab")
color_mat = ['#00FFFF','#0000A0','#800080','#FF00FF', '#800000','#A52A2A'] if use_existing_parameters == 1: with open(Exp_Folder+filename_save_prefix+'_save_kmeans_variables') as f: kmeans_clusters_ind, kmeans_clusters_eachodor, kmeans_clusters_allodor, time_baseline,ignore_clusters = pickle.load(f) # Go into the main function that does kmeans for indiviudal trials from kmeans_thunder_analysis import run_analysis_individualodors from kmeans_thunder_analysis import run_analysis_eachodor from kmeans_thunder_analysis import run_analysis_allodor from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderkmeans") if files_to_do_kmeans[0]== 1: run_analysis_individualodors(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_ind,\ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans,time_baseline,redo_kmeans_colormap,ignore_clusters) if files_to_do_kmeans[1]== 1: run_analysis_eachodor(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_eachodor, \ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans,time_baseline,redo_kmeans_colormap,ignore_clusters) if files_to_do_kmeans[2]== 1: run_analysis_allodor(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_allodor, \ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans, time_baseline,redo_kmeans_colormap,ignore_clusters) ############# Save all imput parameters with open(Exp_Folder+filename_save_prefix+'_save_kmeans_variables', 'w') as f:
if use_existing_parameters == 1: with open(Exp_Folder + filename_save_prefix_forICA + '_save_ICA_variables') as f: ICA_components_ind, num_ICA_colors_ind, color_map_ind,\ ICA_components_eachexp, num_ICA_colors_eachexp, color_map_eachexp,\ ICA_components_allexp, num_ICA_colors_allexp, color_map_allexp,colors_ica = pickle.load(f) # Go into the main function that does ICA for indiviudal trials from ica_thunder_analysis import run_analysis_individualexps from ica_thunder_analysis import run_analysis_eachexp from ica_thunder_analysis import run_analysis_allexp from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderICA") if files_to_do_ICA[0] == 1: run_analysis_individualexps(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_ind, PCA_components_ind, num_ICA_colors_ind, color_map_ind,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) if files_to_do_ICA[1] == 1: run_analysis_eachexp(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_eachexp, PCA_components_eachexp, num_ICA_colors_eachexp, color_map_eachexp,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) if files_to_do_ICA[2] == 1: run_analysis_allexp(Exp_Folder, filename_save_prefix_forICA, filename_save_prefix_for_textfile, ICA_components_allexp, PCA_components_allexp, num_ICA_colors_allexp, color_map_allexp,\ tsc,redo_ICA, num_fish_used, stimulus_pulse, stimulus_on_time, stimulus_off_time,color_mat, time_baseline,colors_ica ) ############# Save all imput parameters with open(Exp_Folder + filename_save_prefix_forICA + '_save_ICA_variables',
""" import optparse from thunder import ThunderContext, RegressionModel if __name__ == "__main__": parser = optparse.OptionParser( description="fit a regression model", usage="%prog datafile modelfile outputdir [options]") parser.add_option("--regressmode", choices=("mean", "linear", "bilinear"), default="linear", help="form of regression") opts, args = parser.parse_args() try: datafile = args[0] modelfile = args[1] outputdir = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="regress") data = tsc.loadText(datafile) result = RegressionModel.load(modelfile, opts.regressmode).fit(data) outputdir += "-regress" tsc.export(result.select('stats'), outputdir, "stats", "matlab") tsc.export(result.select('betas'), outputdir, "betas", "matlab")
class TestContextLoading(PySparkTestCaseWithOutputDir): def setUp(self): super(TestContextLoading, self).setUp() self.tsc = ThunderContext(self.sc) @staticmethod def _findTestResourcesDir(resourcesdirname="resources"): testdirpath = os.path.dirname(os.path.realpath(__file__)) testresourcesdirpath = os.path.join(testdirpath, resourcesdirname) if not os.path.isdir(testresourcesdirpath): raise IOError("Test resources directory "+testresourcesdirpath+" not found") return testresourcesdirpath def __run_loadStacksAsSeries(self, shuffle): rangeary = np.arange(64*128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((128, 64), order='F') range_series = self.tsc.loadImagesAsSeries(filepath, dims=(128, 64), shuffle=shuffle) range_series_ary = range_series.pack() assert_equals((128, 64), range_series.dims.count) assert_equals((128, 64), range_series_ary.shape) assert_true(np.array_equal(expectedary, range_series_ary)) def test_loadStacksAsSeriesNoShuffle(self): self.__run_loadStacksAsSeries(False) def test_loadStacksAsSeriesWithShuffle(self): self.__run_loadStacksAsSeries(True) def __run_load3dStackAsSeries(self, shuffle): rangeary = np.arange(32*64*4, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((32, 64, 4), order='F') range_series_noshuffle = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle) range_series_noshuffle_ary = range_series_noshuffle.pack() assert_equals((32, 64, 4), range_series_noshuffle.dims.count) assert_equals((32, 64, 4), range_series_noshuffle_ary.shape) assert_true(np.array_equal(expectedary, range_series_noshuffle_ary)) def test_load3dStackAsSeriesNoShuffle(self): self.__run_load3dStackAsSeries(False) def test_load3dStackAsSeriesWithShuffle(self): self.__run_load3dStackAsSeries(True) def __run_loadMultipleStacksAsSeries(self, shuffle): rangeary = np.arange(64*128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary01.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((128, 64), order='F') rangeary2 = np.arange(64*128, 2*64*128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary02.stack") rangeary2.tofile(filepath) expectedary2 = rangeary2.reshape((128, 64), order='F') range_series = self.tsc.loadImagesAsSeries(self.outputdir, dims=(128, 64), shuffle=shuffle) range_series_ary = range_series.pack() range_series_ary_xpose = range_series.pack(transpose=True) assert_equals((128, 64), range_series.dims.count) assert_equals((2, 128, 64), range_series_ary.shape) assert_equals((2, 64, 128), range_series_ary_xpose.shape) assert_true(np.array_equal(expectedary, range_series_ary[0])) assert_true(np.array_equal(expectedary2, range_series_ary[1])) assert_true(np.array_equal(expectedary.T, range_series_ary_xpose[0])) assert_true(np.array_equal(expectedary2.T, range_series_ary_xpose[1])) def test_loadMultipleStacksAsSeriesNoShuffle(self): self.__run_loadMultipleStacksAsSeries(False) def test_loadMultipleStacksAsSeriesWithShuffle(self): self.__run_loadMultipleStacksAsSeries(True) def __run_loadTifAsSeries(self, shuffle): tmpary = np.arange(60*120, dtype=np.dtype('uint16')) rangeary = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary) filepath = os.path.join(self.outputdir, "rangetif01.tif") pilimg.save(filepath) del pilimg, tmpary range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle) range_series_ary = range_series.pack() assert_equals((60, 120, 1), range_series.dims.count) assert_equals((60, 120), range_series_ary.shape) assert_true(np.array_equal(rangeary, range_series_ary)) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTifAsSeriesNoShuffle(self): self.__run_loadTifAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTifAsSeriesWithShuffle(self): self.__run_loadTifAsSeries(True) def __run_loadTestTifAsSeries(self, shuffle): testresourcesdir = TestContextLoading._findTestResourcesDir() imagepath = os.path.join(testresourcesdir, "multilayer_tif", "dotdotdot_lzw.tif") testimg_pil = Image.open(imagepath) testimg_arys = list() testimg_arys.append(pil_to_array(testimg_pil)) # original shape 70, 75 testimg_pil.seek(1) testimg_arys.append(pil_to_array(testimg_pil)) testimg_pil.seek(2) testimg_arys.append(pil_to_array(testimg_pil)) range_series_noshuffle = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle) range_series_noshuffle_ary = range_series_noshuffle.pack() range_series_noshuffle_ary_xpose = range_series_noshuffle.pack(transpose=True) assert_equals((70, 75, 3), range_series_noshuffle.dims.count) assert_equals((70, 75, 3), range_series_noshuffle_ary.shape) assert_equals((3, 75, 70), range_series_noshuffle_ary_xpose.shape) assert_true(np.array_equal(testimg_arys[0], range_series_noshuffle_ary[:, :, 0])) assert_true(np.array_equal(testimg_arys[1], range_series_noshuffle_ary[:, :, 1])) assert_true(np.array_equal(testimg_arys[2], range_series_noshuffle_ary[:, :, 2])) assert_true(np.array_equal(testimg_arys[0].T, range_series_noshuffle_ary_xpose[0])) assert_true(np.array_equal(testimg_arys[1].T, range_series_noshuffle_ary_xpose[1])) assert_true(np.array_equal(testimg_arys[2].T, range_series_noshuffle_ary_xpose[2])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTestTifAsSeriesNoShuffle(self): self.__run_loadTestTifAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTestTifAsSeriesWithShuffle(self): self.__run_loadTestTifAsSeries(True) def __run_loadMultipleTifsAsSeries(self, shuffle): tmpary = np.arange(60*120, dtype=np.dtype('uint16')) rangeary = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary) filepath = os.path.join(self.outputdir, "rangetif01.tif") pilimg.save(filepath) tmpary = np.arange(60*120, 2*60*120, dtype=np.dtype('uint16')) rangeary2 = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary2) filepath = os.path.join(self.outputdir, "rangetif02.tif") pilimg.save(filepath) del pilimg, tmpary range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle) range_series_ary = range_series.pack() range_series_ary_xpose = range_series.pack(transpose=True) assert_equals((60, 120, 1), range_series.dims.count) assert_equals((2, 60, 120), range_series_ary.shape) assert_equals((2, 120, 60), range_series_ary_xpose.shape) assert_true(np.array_equal(rangeary, range_series_ary[0])) assert_true(np.array_equal(rangeary2, range_series_ary[1])) assert_true(np.array_equal(rangeary.T, range_series_ary_xpose[0])) assert_true(np.array_equal(rangeary2.T, range_series_ary_xpose[1])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleTifsAsSeriesNoShuffle(self): self.__run_loadMultipleTifsAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleTifsAsSeriesWithShuffle(self): self.__run_loadMultipleTifsAsSeries(True)
# Load thunder from pyspark import SparkContext, SparkConf from thunder import Colorize, ThunderContext image = Colorize.image import os #Load Sci-kit image from skimage.viewer import ImageViewer as skImageViewer #Load spark context conf = SparkConf() \ .setAppName("Display face") \ .set("spark.executor.memory", "5g") sc = SparkContext(conf=conf) #load thunder bolt context tsc = ThunderContext(sc) # Load image using thunder data = tsc.loadImages(os.path.dirname(os.path.realpath(__file__))+'/mush.png',inputFormat='png') img = data.first()[1] # Display image using Sci-kit image viewer = skImageViewer(img[:,:,0]) viewer.show()
class TestLoadIrregularImages(PySparkTestCaseWithOutputDir): def setUp(self): super(TestLoadIrregularImages, self).setUp() self.tsc = ThunderContext(self.sc) def _generate_array(self, dtype): self.ary = arange(256, dtype=dtypeFunc(dtype)).reshape((16, 4, 4)) # 16 pages of 4x4 images def _write_tiffs(self): import thunder.rdds.fileio.tifffile as tifffile writer1 = tifffile.TiffWriter(os.path.join(self.outputdir, "tif01.tif")) writer1.save(self.ary[:8].transpose((0, 2, 1)), photometric="minisblack") # write out 8 pages writer1.close() del writer1 writer2 = tifffile.TiffWriter(os.path.join(self.outputdir, "tif02.tif")) writer2.save(self.ary.transpose((0, 2, 1)), photometric="minisblack") # write out all 16 pages writer2.close() del writer2 def _write_stacks(self): with open(os.path.join(self.outputdir, "stack01.bin"), "w") as f: self.ary[:8].tofile(f) with open(os.path.join(self.outputdir, "stack02.bin"), "w") as f: self.ary.tofile(f) def _run_tst(self, imgType, dtype): self._generate_array(dtype) if imgType.lower().startswith('tif'): self._write_tiffs() inputFormat, ext, dims = "tif", "tif", None elif imgType.lower().startswith("stack"): self._write_stacks() inputFormat, ext, dims = "stack", "bin", (16, 4, 4) else: raise ValueError("Unknown imgType: %s" % imgType) # with nplanes=2, this should yield a 12 record Images object, which after converting to # a series and packing should be a 12 x 4 x 4 x 2 array. # renumber=True is required in this case in order to ensure sensible results. series = self.tsc.loadImagesAsSeries(self.outputdir, inputFormat=inputFormat, ext=ext, blockSize=(2, 1, 1), blockSizeUnits="pixels", nplanes=2, dims=dims, renumber=True) packedAry = series.pack() assert_equals((12, 4, 4, 2), packedAry.shape) assert_true(array_equal(self.ary[0:2], packedAry[0].T)) assert_true(array_equal(self.ary[2:4], packedAry[1].T)) assert_true(array_equal(self.ary[4:6], packedAry[2].T)) assert_true(array_equal(self.ary[6:8], packedAry[3].T)) # first image was only 4 2-plane records assert_true(array_equal(self.ary[0:2], packedAry[4].T)) assert_true(array_equal(self.ary[2:4], packedAry[5].T)) assert_true(array_equal(self.ary[4:6], packedAry[6].T)) assert_true(array_equal(self.ary[6:8], packedAry[7].T)) assert_true(array_equal(self.ary[8:10], packedAry[8].T)) assert_true(array_equal(self.ary[10:12], packedAry[9].T)) assert_true(array_equal(self.ary[12:14], packedAry[10].T)) assert_true(array_equal(self.ary[14:16], packedAry[11].T)) def test_loadMultipleSignedIntTifsAsSeries(self): self._run_tst('tif', 'int16') def test_loadMultipleUnsignedIntTifsAsSeries(self): self._run_tst('tif', 'uint16') # can't currently have binary stack files of different sizes, since we have # fixed `dims` for all stacks. leaving in place b/c it seems like something # to support soon. # def test_loadMultipleBinaryStacksAsSeries(self): # self._run_tst('stack', 'uint16')
def setUp(self): super(TestLoadIrregularImages, self).setUp() self.tsc = ThunderContext(self.sc)
class TestContextLoading(PySparkTestCaseWithOutputDir): def setUp(self): super(TestContextLoading, self).setUp() self.tsc = ThunderContext(self.sc) @staticmethod def _findTestResourcesDir(resourcesDirName="resources"): testDirPath = os.path.dirname(os.path.realpath(__file__)) testResourcesDirPath = os.path.join(testDirPath, resourcesDirName) if not os.path.isdir(testResourcesDirPath): raise IOError("Test resources directory "+testResourcesDirPath+" not found") return testResourcesDirPath def test_loadStacksAsSeriesWithShuffle(self): rangeAry = arange(64*128, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary.stack") rangeAry.tofile(filePath) expectedAry = rangeAry.reshape((128, 64), order='F') rangeSeries = self.tsc.loadImagesAsSeries(filePath, dims=(128, 64)) assert_equals('float32', rangeSeries._dtype) # check before any potential first() calls update this val rangeSeriesAry = rangeSeries.pack() assert_equals((128, 64), rangeSeries.dims.count) assert_equals((128, 64), rangeSeriesAry.shape) assert_equals('float32', str(rangeSeriesAry.dtype)) assert_true(array_equal(expectedAry, rangeSeriesAry)) def test_load3dStackAsSeriesWithShuffle(self): rangeAry = arange(32*64*4, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary.stack") rangeAry.tofile(filePath) expectedAry = rangeAry.reshape((32, 64, 4), order='F') rangeSeries = self.tsc.loadImagesAsSeries(filePath, dims=(32, 64, 4)) assert_equals('float32', rangeSeries._dtype) rangeSeriesAry = rangeSeries.pack() assert_equals((32, 64, 4), rangeSeries.dims.count) assert_equals((32, 64, 4), rangeSeriesAry.shape) assert_equals('float32', str(rangeSeriesAry.dtype)) assert_true(array_equal(expectedAry, rangeSeriesAry)) def __run_loadMultipleStacksAsSeries(self): rangeAry = arange(64*128, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary01.bin") rangeAry.tofile(filePath) expectedAry = rangeAry.reshape((128, 64), order='F') rangeAry2 = arange(64*128, 2*64*128, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary02.bin") rangeAry2.tofile(filePath) expectedAry2 = rangeAry2.reshape((128, 64), order='F') rangeSeries = self.tsc.loadImagesAsSeries(self.outputdir, dims=(128, 64)) assert_equals('float32', rangeSeries._dtype) rangeSeriesAry = rangeSeries.pack() rangeSeriesAry_xpose = rangeSeries.pack(transpose=True) assert_equals((128, 64), rangeSeries.dims.count) assert_equals((2, 128, 64), rangeSeriesAry.shape) assert_equals((2, 64, 128), rangeSeriesAry_xpose.shape) assert_equals('float32', str(rangeSeriesAry.dtype)) assert_true(array_equal(expectedAry, rangeSeriesAry[0])) assert_true(array_equal(expectedAry2, rangeSeriesAry[1])) assert_true(array_equal(expectedAry.T, rangeSeriesAry_xpose[0])) assert_true(array_equal(expectedAry2.T, rangeSeriesAry_xpose[1])) def test_loadMultipleMultipointStacksAsSeries(self): rangeAry = arange(64*128, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary01.bin") rangeAry.tofile(filePath) expectedAry = rangeAry.reshape((32, 32, 8), order='F') rangeAry2 = arange(64*128, 2*64*128, dtype=dtypeFunc('int16')) filePath = os.path.join(self.outputdir, "rangeary02.bin") rangeAry2.tofile(filePath) expectedAry2 = rangeAry2.reshape((32, 32, 8), order='F') rangeSeries = self.tsc.loadImagesAsSeries(self.outputdir, dims=(32, 32, 8), nplanes=2) assert_equals('float32', rangeSeries._dtype) rangeSeriesAry = rangeSeries.pack() assert_equals((32, 32, 2), rangeSeries.dims.count) assert_equals((8, 32, 32, 2), rangeSeriesAry.shape) assert_equals('float32', str(rangeSeriesAry.dtype)) assert_true(array_equal(expectedAry[:, :, :2], rangeSeriesAry[0])) assert_true(array_equal(expectedAry[:, :, 2:4], rangeSeriesAry[1])) assert_true(array_equal(expectedAry[:, :, 4:6], rangeSeriesAry[2])) assert_true(array_equal(expectedAry[:, :, 6:], rangeSeriesAry[3])) assert_true(array_equal(expectedAry2[:, :, :2], rangeSeriesAry[4])) assert_true(array_equal(expectedAry2[:, :, 2:4], rangeSeriesAry[5])) assert_true(array_equal(expectedAry2[:, :, 4:6], rangeSeriesAry[6])) assert_true(array_equal(expectedAry2[:, :, 6:], rangeSeriesAry[7])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def __run_loadTifAsSeries(self): tmpAry = arange(60*120, dtype=dtypeFunc('uint16')) rangeAry = mod(tmpAry, 255).astype('uint8').reshape((60, 120)) pilImg = Image.fromarray(rangeAry) filePath = os.path.join(self.outputdir, "rangetif01.tif") pilImg.save(filePath) del pilImg, tmpAry rangeSeries = self.tsc.loadImagesAsSeries(self.outputdir, inputFormat="tif-stack") assert_equals('float16', rangeSeries._dtype) # check before any potential first() calls update this val rangeSeriesAry = rangeSeries.pack() assert_equals((60, 120), rangeSeries.dims.count) # 2d tif now loaded as 2d image; was 3d with singleton z dim assert_equals((60, 120), rangeSeriesAry.shape) assert_equals('float16', str(rangeSeriesAry.dtype)) assert_true(array_equal(rangeAry, rangeSeriesAry)) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTestTifAsSeriesWithShuffle(self): testResourcesDir = TestContextLoading._findTestResourcesDir() imagePath = os.path.join(testResourcesDir, "multilayer_tif", "dotdotdot_lzw.tif") testimg_pil = Image.open(imagePath) testimg_arys = list() testimg_arys.append(array(testimg_pil)) # original shape 70, 75 testimg_pil.seek(1) testimg_arys.append(array(testimg_pil)) testimg_pil.seek(2) testimg_arys.append(array(testimg_pil)) rangeSeries = self.tsc.loadImagesAsSeries(imagePath, inputFormat="tif-stack") assert_true(rangeSeries._dtype.startswith("float")) rangeSeriesAry = rangeSeries.pack() rangeSeriesAry_xpose = rangeSeries.pack(transpose=True) assert_equals((70, 75, 3), rangeSeries.dims.count) assert_equals((70, 75, 3), rangeSeriesAry.shape) assert_equals((3, 75, 70), rangeSeriesAry_xpose.shape) assert_true(rangeSeriesAry.dtype.kind == "f") assert_true(array_equal(testimg_arys[0], rangeSeriesAry[:, :, 0])) assert_true(array_equal(testimg_arys[1], rangeSeriesAry[:, :, 1])) assert_true(array_equal(testimg_arys[2], rangeSeriesAry[:, :, 2])) assert_true(array_equal(testimg_arys[0].T, rangeSeriesAry_xpose[0])) assert_true(array_equal(testimg_arys[1].T, rangeSeriesAry_xpose[1])) assert_true(array_equal(testimg_arys[2].T, rangeSeriesAry_xpose[2])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleTifsAsSeriesWithShuffle(self): tmpAry = arange(60*120, dtype=dtypeFunc('uint16')) rangeAry = mod(tmpAry, 255).astype('uint8').reshape((60, 120)) pilImg = Image.fromarray(rangeAry) filePath = os.path.join(self.outputdir, "rangetif01.tif") pilImg.save(filePath) tmpAry = arange(60*120, 2*60*120, dtype=dtypeFunc('uint16')) rangeAry2 = mod(tmpAry, 255).astype('uint8').reshape((60, 120)) pilImg = Image.fromarray(rangeAry2) filePath = os.path.join(self.outputdir, "rangetif02.tif") pilImg.save(filePath) del pilImg, tmpAry rangeSeries = self.tsc.loadImagesAsSeries(self.outputdir, inputFormat="tif-stack") assert_equals('float16', rangeSeries._dtype) rangeSeriesAry = rangeSeries.pack() rangeSeriesAry_xpose = rangeSeries.pack(transpose=True) assert_equals((60, 120), rangeSeries.dims.count) # 2d tif now loaded as 2d image; was 3d with singleton z dim assert_equals((2, 60, 120), rangeSeriesAry.shape) assert_equals((2, 120, 60), rangeSeriesAry_xpose.shape) assert_equals('float16', str(rangeSeriesAry.dtype)) assert_true(array_equal(rangeAry, rangeSeriesAry[0])) assert_true(array_equal(rangeAry2, rangeSeriesAry[1])) assert_true(array_equal(rangeAry.T, rangeSeriesAry_xpose[0])) assert_true(array_equal(rangeAry2.T, rangeSeriesAry_xpose[1])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleMultipointTifsAsSeries(self): testResourcesDir = TestContextLoading._findTestResourcesDir() imagesPath = os.path.join(testResourcesDir, "multilayer_tif", "dotdotdot_lzw*.tif") # load only one file, second is a copy of this one testimg_pil = Image.open(os.path.join(testResourcesDir, "multilayer_tif", "dotdotdot_lzw.tif")) testimg_arys = [array(testimg_pil)] for idx in xrange(1, 3): testimg_pil.seek(idx) testimg_arys.append(array(testimg_pil)) rangeSeries = self.tsc.loadImagesAsSeries(imagesPath, inputFormat="tif-stack", nplanes=1) assert_equals((70, 75), rangeSeries.dims.count) rangeSeriesAry = rangeSeries.pack() assert_equals((6, 70, 75), rangeSeriesAry.shape) for idx in xrange(6): assert_true(array_equal(testimg_arys[idx % 3], rangeSeriesAry[idx])) @staticmethod def _tempFileWithPaths(f, blob): f.write(blob) f.flush() return f.name def test_loadParams(self): params = json.dumps({"name": "test1", "value": [1, 2, 3]}) f = tempfile.NamedTemporaryFile() path = TestContextLoading._tempFileWithPaths(f, params) d = self.tsc.loadParams(path) assert(d.names() == ["test1"]) assert(array_equal(d.values(), [1, 2, 3])) params = json.dumps([{"name": "test0", "value": [1, 2, 3]}, {"name": "test1", "value": [4, 5, 6]}]) f = tempfile.NamedTemporaryFile() path = TestContextLoading._tempFileWithPaths(f, params) d = self.tsc.loadParams(path) assert(d.names() == ["test0", "test1"]) assert(array_equal(d.values(), [[1, 2, 3], [4, 5, 6]])) assert(array_equal(d.values("test0"), [1, 2, 3])) def test_loadSeriesFromArray(self): target = array([[0, 1], [0, 2]]) d1 = self.tsc.loadSeriesFromArray([[0, 1], [0, 2]]) d2 = self.tsc.loadSeriesFromArray(array([[0, 1], [0, 2]])) assert(array_equal(d1.collectValuesAsArray(), target)) assert(d1.keys().collect(), [(0,), (1,)]) assert(array_equal(d2.collectValuesAsArray(), target)) assert(d2.keys().collect(), [(0,), (1,)]) target = array([[0, 1]]) d1 = self.tsc.loadSeriesFromArray([0, 1]) d2 = self.tsc.loadSeriesFromArray(array([0, 1])) assert(array_equal(d1.collectValuesAsArray(), target)) assert(d1.keys().collect(), [(0,)]) assert(array_equal(d2.collectValuesAsArray(), target)) assert(d2.keys().collect(), [(0,)]) def test_loadImagesFromArray(self): target = array([[[0, 1], [0, 2]]]) d1 = self.tsc.loadImagesFromArray([[0, 1], [0, 2]]) d2 = self.tsc.loadImagesFromArray(array([[0, 1], [0, 2]])) assert(array_equal(d1.collectValuesAsArray(), target)) assert(d1.keys().collect() == [0]) assert(array_equal(d2.collectValuesAsArray(), target)) assert(d2.keys().collect() == [0]) target = array([[[0, 1], [0, 2]], [[0, 1], [0, 2]]]) d1 = self.tsc.loadImagesFromArray([[[0, 1], [0, 2]], [[0, 1], [0, 2]]]) d2 = self.tsc.loadImagesFromArray(array([[[0, 1], [0, 2]], [[0, 1], [0, 2]]])) assert(array_equal(d1.collectValuesAsArray(), target)) assert(d1.keys().collect() == [0, 1]) assert(array_equal(d2.collectValuesAsArray(), target)) assert(d2.keys().collect() == [0, 1])
class TestContextWriting(PySparkTestCaseWithOutputDir): def setUp(self): super(TestContextWriting, self).setUp() self.tsc = ThunderContext(self.sc) def test_export_npy(self): from numpy import load a = array([[1, 2], [2, 3]]) filename = self.outputdir + "/test.npy" self.tsc.export(a, filename) aa = load(filename) assert(array_equal(aa, a)) filename = self.outputdir + "/test" self.tsc.export(a, filename, outputFormat="npy", overwrite=True) aa = load(filename + ".npy") assert(array_equal(aa, a)) def test_export_mat(self): from scipy.io import loadmat a = array([[1, 2], [2, 3]]) filename = self.outputdir + "/test.mat" self.tsc.export(a, filename) aa = loadmat(filename) assert(array_equal(aa['test'], a)) filename = self.outputdir + "/test" self.tsc.export(a, filename, outputFormat="mat", overwrite=True) aa = loadmat(filename + ".mat") assert(array_equal(aa['test'], a)) filename = self.outputdir + "/test" self.tsc.export(a, filename, outputFormat="mat", varname="tmp", overwrite=True) aa = loadmat(filename + ".mat") assert(array_equal(aa['tmp'], a)) def test_export_txt(self): from numpy import loadtxt a = array([[1, 2], [2, 3]]) filename = self.outputdir + "/test.txt" self.tsc.export(a, filename) aa = loadtxt(filename) assert(array_equal(aa, a)) filename = self.outputdir + "/test" self.tsc.export(a, filename, outputFormat="txt", overwrite=True) aa = loadtxt(filename + ".txt") assert(array_equal(aa, a))
usage="%prog datafile outputdir k [options]") parser.add_option("--nmfmethod", choices=["als"], default="als") parser.add_option("--maxiter", type=float, default=20) parser.add_option("--tol", type=float, default=0.001) parser.add_option("--w_hist", action="store_true", default=False) parser.add_option("--recon_hist", action="store_true", default=False) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] k = int(args[2]) except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="nmf") data = tsc.loadSeries(datafile).cache() nmf = NMF(k=k, method=opts.nmfmethod, maxIter=opts.maxiter, tol=opts.tol, wHist=opts.w_hist, reconHist=opts.recon_hist) nmf.fit(data) outputdir += "-nmf" tsc.export(nmf.w, outputdir, "w", "matlab") tsc.export(nmf.h, outputdir, "h", "matlab") if opts.w_hist: tsc.export(nmf.wConvergence, outputdir, "w_convergence", "matlab") if opts.recon_hist: tsc.export(nmf.reconErr, outputdir, "rec_err", "matlab")
if use_existing_parameters == 1: with open(Exp_Folder + filename_save_prefix + '_save_NMF_variables') as f: NMF_components_ind, num_NMF_colors_ind, color_map_ind,max_iterations_ind, tolerence_level_ind,\ NMF_components_eachexp, num_NMF_colors_eachexp, color_map_eachexp,max_iterations_eachexp, tolerence_level_eachexp,\ NMF_components_allexp, num_NMF_colors_allexp, color_map_allexp,max_iterations_allexp, tolerence_level_allexp,colors_NMF = pickle.load(f) # Go into the main function that does NMF for indiviudal trials from NMF_thunder_analysis import run_analysis_individualodors from NMF_thunder_analysis import run_analysis_eachodor from NMF_thunder_analysis import run_analysis_allodor from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderNMF") if files_to_do_NMF[0] == 1: run_analysis_individualodors(Exp_Folder, filename_save_prefix, NMF_components_ind, num_NMF_colors_ind, color_map_ind,\ tsc,redo_NMF, stimulus_on_time, stimulus_off_time, time_baseline,colors_NMF,max_iterations_ind, tolerence_level_ind,remake_colormap) if files_to_do_NMF[1] == 1: run_analysis_eachodor(Exp_Folder, filename_save_prefix, NMF_components_eachexp, num_NMF_colors_eachexp, color_map_eachexp,\ tsc,redo_NMF, stimulus_on_time, stimulus_off_time, time_baseline,colors_NMF, max_iterations_eachexp, tolerence_level_eachexp,remake_colormap) if files_to_do_NMF[2] == 1: run_analysis_allodor(Exp_Folder, filename_save_prefix, NMF_components_allexp, num_NMF_colors_allexp, color_map_allexp,\ tsc,redo_NMF, stimulus_on_time, stimulus_off_time, time_baseline,colors_NMF, max_iterations_allexp, tolerence_level_allexp,remake_colormap) ############# Save all imput parameters with open(Exp_Folder + filename_save_prefix + '_save_NMF_variables', 'w') as f:
def CompressImages(inputs, output, confObj): debugMode = False st = datetime.now() imageExt = confObj['ext'] imageHeight = confObj['dims'][0] imageWidth = confObj['dims'][1] refImgId = confObj['refImageId'] diffImageFolder = confObj['DiffImageFolder'] if debugMode == True: print confObj import glob totImages = len(glob.glob(inputs + "*." + imageExt)) if os.path.exists(output): shutil.rmtree(output) conf = SparkConf().setAppName('ImgCompress') sc = SparkContext(conf=conf) imageHeight = sc.broadcast(imageHeight) imageWidth = sc.broadcast(imageWidth) tsc = ThunderContext(sc) tscImages = tsc.loadImages(inputs, (imageHeight.value, imageWidth.value), imageExt, imageExt).cache() floatingPixelRdd = tscImages.rdd.flatMapValues(lambda r: r).zipWithIndex().map(lambda l: ((l[0][0],(l[1]-(l[0][0]*int(imageHeight.value)))),l[0][1]))\ .flatMapValues(lambda r: r).zipWithIndex().\ map(lambda l: ((l[0][0][1],(l[1]-(l[0][0][0]*int(imageWidth.value)*int(imageHeight.value) + l[0][0][1]*int(imageWidth.value)))),(l[0][0][0],l[0][1]))) if debugMode == True: floatingPixelRdd.saveAsTextFile(output + "\\Stage-1-FloatingPixel") temporalVoxelRdd = floatingPixelRdd.groupByKey().map( lambda x: (x[0], list(x[1]))).cache() if debugMode == True: temporalVoxelRdd.saveAsTextFile(output + "\\Stage-2-TemporalVoxel") iMapImageDict = {} for imgIndex in range(totImages): imgIndexBD = sc.broadcast(imgIndex) #-------------------------------HELPER FUNCTIONS------------------------------------- def ReAdjustImapList(l): intList = l[1] mKey = intList.pop(imgIndexBD.value)[1] return (mKey, intList) def calculateMedainForEachImage(l): intRdd = sc.parallelize(l[1]).flatMap( lambda l: l).groupByKey().map(lambda x: (x[0], list(x[1]))) intRddSorted = intRdd.map(lambda l: (l[0], sorted(l[1]))) intRddMedian = intRddSorted.map(lambda l: (l[0], l[1][int(len(l[1]) / 2)])) return intRddMedian.collect() #-------------------------------HELPER FUNCTIONS------------------------------------- imapRdd = temporalVoxelRdd.map(lambda l: ReAdjustImapList(l)).sortBy(lambda l: l[0], False)\ .groupByKey().map(lambda x : (x[0], list(x[1]))) if debugMode == True: imapRdd.saveAsTextFile(output + "\\Stage-3__IMAP-Stage-1_imgIdx-" + str(imgIndex)) imapRdd = imapRdd.flatMapValues(lambda l: l).flatMapValues(lambda l: l).map(lambda l: ((l[0],l[1][0]),l[1][1])).\ groupByKey().map(lambda x : (x[0], list(x[1]))).\ map(lambda l: (l[0],sorted(l[1]))).map(lambda l: (l[0], l[1][int(len(l[1])/2)])).\ map(lambda l: (l[0][0],(l[0][1], l[1]))) if debugMode == True: imapRdd.saveAsTextFile(output + "\\Stage-3__IMAP-Stage-2_imgIdx-" + str(imgIndex)) imapRdd = imapRdd.groupByKey().map( lambda x: (x[0], sorted(list(x[1]), key=lambda k: k[0]))) if debugMode == True: imapRdd.saveAsTextFile(output + "\\Stage-3__IMAP-Stage-3_imgIdx-" + str(imgIndex)) imapDict = imapRdd.collectAsMap() iMapImageDict[imgIndex] = imapDict iMapDictBD = sc.broadcast(iMapImageDict) refImageIdBD = sc.broadcast(refImgId) def CalcMapValue(pix, iMapVal): pImgIdx = pix[0] residual = 0 for iIdx in iMapVal: if pImgIdx == iIdx[0]: residual = int(iIdx[1]) - pix[1] break return (pix[0], residual) def ApplyIMap(l): voxel = l[1] refIntensity = l[1][refImageIdBD.value][1] iMapValues = iMapDictBD.value[refImageIdBD.value][refIntensity] resdualValues = [] for pixel in voxel: if pixel[0] != refImageIdBD.value: resdualValues.append(CalcMapValue(pixel, iMapValues)) else: resdualValues.append(pixel) return (l[0], resdualValues) diffFVRdd = temporalVoxelRdd.map(lambda l: ApplyIMap(l)) if debugMode == True: diffFVRdd.saveAsTextFile(output + "\\Stage-4__Diff-Stage-1") residualImages = diffFVRdd.flatMapValues(lambda l: l).map(lambda l: ((l[1][0], l[0][0]),(l[0][1],l[1][1])))\ .groupByKey().map(lambda x : (x[0], sorted(list(x[1]),key = lambda k: (k[0]))))\ .map(lambda l: (l[0][0],(l[0][1],l[1])))\ .groupByKey().map(lambda x : (x[0], sorted(list(x[1]),key = lambda k: (k[0]))))\ .map(lambda l: (l[0], removeRCIndex(l[1]))) residualImages.saveAsTextFile(output + "\\" + diffImageFolder) for ingIdx in range(totImages): iMapImageDict[str(ingIdx)] = dict([ (str(k), str(v)) for k, v in iMapImageDict[ingIdx].items() ]) del iMapImageDict[ingIdx] import json with open(output + "\\imap.json", 'w') as f: json.dump(iMapImageDict, f) with open(output + '\\conf.json', 'w') as f: json.dump(confObj, f) # clean up sc.stop() en = datetime.now() td = en - st print td
from thunder import ThunderContext, RegressionModel, PCA if __name__ == "__main__": parser = optparse.OptionParser(description="fit a regression model", usage="%prog datafile modelfile outputdir [options]") parser.add_option("--regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_option("--k", type=int, default=2) opts, args = parser.parse_args() try: datafile = args[0] modelfile = args[1] outputdir = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="regresswithpca") data = tsc.loadSeries(datafile) model = RegressionModel.load(modelfile, opts.regressmode) # do regression betas, stats, resid = model.fit(data) pca = PCA(opts.k).fit(betas) # do PCA traj = model.fit(data, pca.comps) # get trajectories outputdir += "-regress" tsc.export(pca.comps, outputdir, "comps", "matlab") tsc.export(pca.latent, outputdir, "latent", "matlab") tsc.export(pca.scores, outputdir, "scores", "matlab") tsc.export(traj, outputdir, "traj", "matlab")
Exp_Folder ='/Users/seetha/Desktop/Ruey_Habenula/Habenula/Short_Stimulus/Fish104_Block2_Blue&UV1c/' filename_save_prefix = 'Test1' from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderNMF") import os filesep = os.path.sep import matplotlib.pyplot as plt import numpy as np from thunder_NMF import run_NMF from thunder_NMF import make_NMF_maps from thunder_NMF_plots import plot_NMF_maps from thunder import Colorize image = Colorize.image Stimulus_Directories = [f for f in os.listdir(Exp_Folder) if os.path.isdir(os.path.join(Exp_Folder, f)) and f.find('Figures')<0] #Stimulus_Directories ii = 0 Trial_Directories = [f for f in os.listdir(os.path.join(Exp_Folder, Stimulus_Directories[ii]))\ if os.path.isdir(os.path.join(Exp_Folder, Stimulus_Directories[ii], f)) and f.find('Figures')<0] Trial_Directories jj = 0 stim_start = 10 #Stimulus Starting time point stim_end = 14 #Stimulus Ending time point
def execute(self): """ Execute this pull request """ printer.status("Executing pull request %s from user %s" % (self.id, self.login)) base, module = self.clone() f = open(base + 'info.json', 'r') info = json.loads(f.read()) sys.path.append(module) run = importlib.import_module('run') spark = os.getenv('SPARK_HOME') if spark is None or spark == '': raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark') sys.path.append(os.path.join(spark, 'python')) sys.path.append(os.path.join(spark, 'python/lib/py4j-0.8.2.1-src.zip')) from thunder import ThunderContext tsc = ThunderContext.start(master="local", appName="neurofinder") datasets = ['data-0', 'data-1', 'data-2', 'data-3', 'data-4', 'data-5'] centers = [5, 7, 9, 11, 13, 15] metrics = {'accuracy': [], 'overlap': [], 'distance': [], 'count': [], 'area': []} try: for ii, name in enumerate(datasets): data, ts, truth = tsc.makeExample('sources', dims=(200, 200), centers=centers[ii], noise=1.0, returnParams=True) sources = run.run(data) accuracy = truth.similarity(sources, metric='distance', thresh=10, minDistance=10) overlap = truth.overlap(sources, minDistance=10) distance = truth.distance(sources, minDistance=10) count = sources.count area = mean(sources.areas) metrics['accuracy'].append({"dataset": name, "value": accuracy}) metrics['overlap'].append({"dataset": name, "value": nanmean(overlap)}) metrics['distance'].append({"dataset": name, "value": nanmean(distance)}) metrics['count'].append({"dataset": name, "value": count}) metrics['area'].append({"dataset": name, "value": area}) im = sources.masks(base=data.mean()) self.post_image(im, name) for k in metrics.keys(): overall = mean([v['value'] for v in metrics[k]]) metrics[k].append({"dataset": "overall", "value": overall}) msg = "Execution successful" printer.success() self.update_status("executed") except Exception: metrics = None msg = "Execution failed" printer.error("failed, returning error") print(traceback.format_exc()) self.send_message(msg) return metrics, info
def setUp(self): super(TestContextLoading, self).setUp() self.tsc = ThunderContext(self.sc)
Exp_Folder = '/Users/seetha/Desktop/KCTD/Fish14_KCTDHUC_5dpf/Tiff/Cropped/Registered/Thresholded_OB/Registered_Stimulus/' filename_save_prefix = 'ThresholdedOB_T81' from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderpca") import os filesep = os.path.sep import time import numpy as np import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import seaborn as sns from thunder import KMeans from thunder import Colorize from thunder_kmeans_plots import plot_kmeans_maps from thunder_kmeans import make_kmeans_maps from kmeans_thunder_analysis import run_kmeans_thunder #Stimulus_Directories = [f for f in os.listdir(Exp_Folder) if os.path.isdir(os.path.join(Exp_Folder, f)) and f.find('Figures')<0] ##Stimulus_Directories #ii = 1 #Trial_Directories = [f for f in os.listdir(os.path.join(Exp_Folder, Stimulus_Directories[ii]))\ #if os.path.isdir(os.path.join(Exp_Folder, Stimulus_Directories[ii], f)) and f.find('Figures')<0] #Trial_Directories #jj = 0
def execute(self, lock, pipe): """ Execute this pull request """ lock.acquire() base, module = self.clone() f = open(base + 'info.json', 'r') info = json.loads(f.read()) printer.status("Executing pull request %s from user %s" % (self.id, self.login)) printer.status("Branch name: %s" % self.branch) printer.status("Algorithm name: %s" % info['algorithm']) sys.path.append(module) run = importlib.import_module('run', module) spark_home = os.getenv('SPARK_HOME') if spark_home is None or spark_home == '': raise Exception('must assign the environmental variable SPARK_HOME with the location of Spark') sys.path.append(os.path.join(spark_home, 'python')) sys.path.append(os.path.join(spark_home, 'python/lib/py4j-0.8.2.1-src.zip')) with quiet(): from thunder import ThunderContext from thunder.utils.launch import findThunderEgg tsc = ThunderContext.start(master=self.get_master(), appName="neurofinder") tsc.addPyFile(findThunderEgg()) log4j = tsc._sc._jvm.org.apache.log4j log4j.LogManager.getRootLogger().setLevel(log4j.Level.ERROR) time.sleep(5) base_path = 'neuro.datasets.private/challenges/neurofinder.test' datasets = ['00.00.test', '00.01.test', '01.00.test', '01.01.test', '02.00.test', '02.01.test', '03.00.test'] metrics = {'score': [], 'recall': [], 'precision': [], 'overlap': [], 'exactness': []} try: for ii, name in enumerate(datasets): printer.status("Proccessing data set %s" % name) data_path = 's3n://' + base_path + '/' + name data_info = self.load_info(base_path, name) data = tsc.loadImages(data_path + '/images/', recursive=True, npartitions=600) truth = tsc.loadSources(data_path + '/sources/sources.json') sources = run.run(data, info=data_info) threshold = 6.0 / data_info['pixels-per-micron'] recall, precision, score = truth.similarity(sources, metric='distance', minDistance=threshold) stats = truth.overlap(sources, method='rates', minDistance=threshold) if sum(~isnan(stats)) > 0: overlap, exactness = tuple(nanmean(stats, axis=0)) else: overlap, exactness = 0.0, 1.0 contributors = str(", ".join(data_info["contributors"])) animal = data_info["animal"] region = data_info["region"] lab = data_info["lab"] base = {"dataset": name, "contributors": contributors, "lab": lab, "region": region, "animal": animal} m = {"value": score} m.update(base) metrics['score'].append(m) m = {"value": recall} m.update(base) metrics['recall'].append(m) m = {"value": precision} m.update(base) metrics['precision'].append(m) m = {"value": overlap} m.update(base) metrics['overlap'].append(m) m = {"value": exactness} m.update(base) metrics['exactness'].append(m) base = data.mean() im = sources.masks(outline=True, base=base.clip(0, percentile(base, 99.9))) self.post_image(im, name) for k in metrics.keys(): overall = mean([v['value'] for v in metrics[k]]) metrics[k].append({"dataset": "overall", "value": overall, "contributors": "", "region": "", "animal": ""}) msg = "Execution successful" printer.success() self.update_status("executed") except Exception: metrics = None msg = "Execution failed" printer.error("failed, returning error") print(traceback.format_exc()) self.send_message(msg) tsc.stop() sys.path.remove(module) pipe.send((metrics, info)) lock.release()
if use_existing_parameters == 1: with open(Exp_Folder + filename_save_prefix + '_save_kmeans_variables') as f: kmeans_clusters_ind, kmeans_clusters_eachodor, kmeans_clusters_allodor, time_baseline, ignore_clusters = pickle.load( f) # Go into the main function that does kmeans for indiviudal trials from kmeans_thunder_analysis import run_analysis_individualodors from kmeans_thunder_analysis import run_analysis_eachodor from kmeans_thunder_analysis import run_analysis_allodor from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderkmeans") if files_to_do_kmeans[0] == 1: run_analysis_individualodors(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_ind,\ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans,time_baseline,redo_kmeans_colormap,ignore_clusters) if files_to_do_kmeans[1] == 1: run_analysis_eachodor(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_eachodor, \ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans,time_baseline,redo_kmeans_colormap,ignore_clusters) if files_to_do_kmeans[2] == 1: run_analysis_allodor(Exp_Folder, filename_save_prefix, filename_save_prefix_forkmeanswithPCA, kmeans_clusters_allodor, \ stimulus_on_time, stimulus_off_time, tsc,redo_kmeans, time_baseline,redo_kmeans_colormap,ignore_clusters) ############# Save all imput parameters with open(Exp_Folder + filename_save_prefix + '_save_kmeans_variables',
""" Example standalone app for calculating series statistics """ import optparse from thunder import ThunderContext if __name__ == "__main__": parser = optparse.OptionParser(description="compute summary statistics on time series data", usage="%prog datafile outputdir mode [options]") parser.add_option("--preprocess", action="store_true", default=False) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] mode = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="stats") data = tsc.loadSeries(datafile).cache() vals = data.seriesStat(mode) outputdir += "-stats" tsc.export(vals, outputdir, "stats_" + mode, "matlab")
) print "Found {0} faces!".format(len(faces)) # Draw a rectangle around the faces for (x, y, w, h) in faces: cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), -1) return img # Load images using thundear and pass it to OpenCV haar cascase one by one if __name__ == "__main__": # Define Spark and Thunder context conf = SparkConf().setAppName("Collaborative Filter").set("spark.executor.memory", "5g") sc = SparkContext(conf=conf) tsc = ThunderContext(sc) # Load all images in data directory data = tsc.loadImages("/home/vj/Desktop/CS-Project/data", inputFormat="png") # Loop through each image and convert them to gray grayImages = data.apply(lambda (k, v): (k, convertToGray(v))) # Loop through all the gray images and find faces FaceImages = grayImages.apply(lambda (k, v): (k, detectFaces(v))) print (data.dims) print (data.nrecords) cv2.imshow("image1", grayImages[0]) cv2.imshow("Face detected1", FaceImages[0]) cv2.imshow("image2", grayImages[1]) cv2.imshow("Face detected2", FaceImages[1])
class TestContextLoading(PySparkTestCaseWithOutputDir): def setUp(self): super(TestContextLoading, self).setUp() self.tsc = ThunderContext(self.sc) @staticmethod def _findTestResourcesDir(resourcesdirname="resources"): testdirpath = os.path.dirname(os.path.realpath(__file__)) testresourcesdirpath = os.path.join(testdirpath, resourcesdirname) if not os.path.isdir(testresourcesdirpath): raise IOError("Test resources directory " + testresourcesdirpath + " not found") return testresourcesdirpath def __run_loadStacksAsSeries(self, shuffle): rangeary = np.arange(64 * 128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((128, 64), order='F') range_series = self.tsc.loadImagesAsSeries(filepath, dims=(128, 64), shuffle=shuffle) assert_equals( 'float32', range_series._dtype ) # check before any potential first() calls update this val range_series_ary = range_series.pack() assert_equals((128, 64), range_series.dims.count) assert_equals((128, 64), range_series_ary.shape) assert_equals('float32', str(range_series_ary.dtype)) assert_true(np.array_equal(expectedary, range_series_ary)) def test_loadStacksAsSeriesNoShuffle(self): self.__run_loadStacksAsSeries(False) def test_loadStacksAsSeriesWithShuffle(self): self.__run_loadStacksAsSeries(True) def __run_load3dStackAsSeries(self, shuffle): rangeary = np.arange(32 * 64 * 4, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((32, 64, 4), order='F') range_series = self.tsc.loadImagesAsSeries(filepath, dims=(32, 64, 4), shuffle=shuffle) assert_equals('float32', range_series._dtype) range_series_ary = range_series.pack() assert_equals((32, 64, 4), range_series.dims.count) assert_equals((32, 64, 4), range_series_ary.shape) assert_equals('float32', str(range_series_ary.dtype)) assert_true(np.array_equal(expectedary, range_series_ary)) def test_load3dStackAsSeriesNoShuffle(self): self.__run_load3dStackAsSeries(False) def test_load3dStackAsSeriesWithShuffle(self): self.__run_load3dStackAsSeries(True) def __run_loadMultipleStacksAsSeries(self, shuffle): rangeary = np.arange(64 * 128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary01.stack") rangeary.tofile(filepath) expectedary = rangeary.reshape((128, 64), order='F') rangeary2 = np.arange(64 * 128, 2 * 64 * 128, dtype=np.dtype('int16')) filepath = os.path.join(self.outputdir, "rangeary02.stack") rangeary2.tofile(filepath) expectedary2 = rangeary2.reshape((128, 64), order='F') range_series = self.tsc.loadImagesAsSeries(self.outputdir, dims=(128, 64), shuffle=shuffle) assert_equals('float32', range_series._dtype) range_series_ary = range_series.pack() range_series_ary_xpose = range_series.pack(transpose=True) assert_equals((128, 64), range_series.dims.count) assert_equals((2, 128, 64), range_series_ary.shape) assert_equals((2, 64, 128), range_series_ary_xpose.shape) assert_equals('float32', str(range_series_ary.dtype)) assert_true(np.array_equal(expectedary, range_series_ary[0])) assert_true(np.array_equal(expectedary2, range_series_ary[1])) assert_true(np.array_equal(expectedary.T, range_series_ary_xpose[0])) assert_true(np.array_equal(expectedary2.T, range_series_ary_xpose[1])) def test_loadMultipleStacksAsSeriesNoShuffle(self): self.__run_loadMultipleStacksAsSeries(False) def test_loadMultipleStacksAsSeriesWithShuffle(self): self.__run_loadMultipleStacksAsSeries(True) def __run_loadTifAsSeries(self, shuffle): tmpary = np.arange(60 * 120, dtype=np.dtype('uint16')) rangeary = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary) filepath = os.path.join(self.outputdir, "rangetif01.tif") pilimg.save(filepath) del pilimg, tmpary range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle) assert_equals( 'float16', range_series._dtype ) # check before any potential first() calls update this val range_series_ary = range_series.pack() assert_equals((60, 120, 1), range_series.dims.count) assert_equals((60, 120), range_series_ary.shape) assert_equals('float16', str(range_series_ary.dtype)) assert_true(np.array_equal(rangeary, range_series_ary)) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTifAsSeriesNoShuffle(self): self.__run_loadTifAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTifAsSeriesWithShuffle(self): self.__run_loadTifAsSeries(True) def __run_loadTestTifAsSeries(self, shuffle): testresourcesdir = TestContextLoading._findTestResourcesDir() imagepath = os.path.join(testresourcesdir, "multilayer_tif", "dotdotdot_lzw.tif") testimg_pil = Image.open(imagepath) testimg_arys = list() testimg_arys.append(pil_to_array(testimg_pil)) # original shape 70, 75 testimg_pil.seek(1) testimg_arys.append(pil_to_array(testimg_pil)) testimg_pil.seek(2) testimg_arys.append(pil_to_array(testimg_pil)) range_series = self.tsc.loadImagesAsSeries(imagepath, inputformat="tif-stack", shuffle=shuffle) assert_true(range_series._dtype.startswith("float")) range_series_ary = range_series.pack() range_series_ary_xpose = range_series.pack(transpose=True) assert_equals((70, 75, 3), range_series.dims.count) assert_equals((70, 75, 3), range_series_ary.shape) assert_equals((3, 75, 70), range_series_ary_xpose.shape) assert_true(range_series_ary.dtype.kind == "f") assert_true(np.array_equal(testimg_arys[0], range_series_ary[:, :, 0])) assert_true(np.array_equal(testimg_arys[1], range_series_ary[:, :, 1])) assert_true(np.array_equal(testimg_arys[2], range_series_ary[:, :, 2])) assert_true( np.array_equal(testimg_arys[0].T, range_series_ary_xpose[0])) assert_true( np.array_equal(testimg_arys[1].T, range_series_ary_xpose[1])) assert_true( np.array_equal(testimg_arys[2].T, range_series_ary_xpose[2])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTestTifAsSeriesNoShuffle(self): self.__run_loadTestTifAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadTestTifAsSeriesWithShuffle(self): self.__run_loadTestTifAsSeries(True) def __run_loadMultipleTifsAsSeries(self, shuffle): tmpary = np.arange(60 * 120, dtype=np.dtype('uint16')) rangeary = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary) filepath = os.path.join(self.outputdir, "rangetif01.tif") pilimg.save(filepath) tmpary = np.arange(60 * 120, 2 * 60 * 120, dtype=np.dtype('uint16')) rangeary2 = np.mod(tmpary, 255).astype('uint8').reshape((60, 120)) pilimg = Image.fromarray(rangeary2) filepath = os.path.join(self.outputdir, "rangetif02.tif") pilimg.save(filepath) del pilimg, tmpary range_series = self.tsc.loadImagesAsSeries(self.outputdir, inputformat="tif-stack", shuffle=shuffle) assert_equals('float16', range_series._dtype) range_series_ary = range_series.pack() range_series_ary_xpose = range_series.pack(transpose=True) assert_equals((60, 120, 1), range_series.dims.count) assert_equals((2, 60, 120), range_series_ary.shape) assert_equals((2, 120, 60), range_series_ary_xpose.shape) assert_equals('float16', str(range_series_ary.dtype)) assert_true(np.array_equal(rangeary, range_series_ary[0])) assert_true(np.array_equal(rangeary2, range_series_ary[1])) assert_true(np.array_equal(rangeary.T, range_series_ary_xpose[0])) assert_true(np.array_equal(rangeary2.T, range_series_ary_xpose[1])) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleTifsAsSeriesNoShuffle(self): self.__run_loadMultipleTifsAsSeries(False) @unittest.skipIf(not _have_image, "PIL/pillow not installed or not functional") def test_loadMultipleTifsAsSeriesWithShuffle(self): self.__run_loadMultipleTifsAsSeries(True)
parser.add_option("--nmfmethod", choices=["als"], default="als") parser.add_option("--maxiter", type=float, default=20) parser.add_option("--tol", type=float, default=0.001) parser.add_option("--w_hist", action="store_true", default=False) parser.add_option("--recon_hist", action="store_true", default=False) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] k = int(args[2]) except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="nmf") data = tsc.loadSeries(datafile).cache() nmf = NMF(k=k, method=opts.nmfmethod, maxIter=opts.maxiter, tol=opts.tol, wHist=opts.w_hist, reconHist=opts.recon_hist) nmf.fit(data) outputdir += "-nmf" tsc.export(nmf.w, outputdir, "w", "matlab") tsc.export(nmf.h, outputdir, "h", "matlab") if opts.w_hist: tsc.export(nmf.wConvergence, outputdir, "w_convergence", "matlab")
parser = argparse.ArgumentParser( description="do independent components analysis") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("c", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--maxiter", type=float, default=100, required=False) parser.add_argument("--tol", type=float, default=0.000001, required=False) parser.add_argument("--seed", type=int, default=0, required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="ica") data = tsc.loadSeries(args.datafile).cache() model = ICA(k=args.k, c=args.c, svdmethod=args.svdmethod, maxiter=args.maxiter, tol=args.tol, seed=args.seed) result = model.fit(data) outputdir = args.outputdir + "-ica" export(result.a, outputdir, "a", "matlab") export(result.sigs, outputdir, "sigs", "matlab")
""" Example standalone app for calculating series statistics """ import optparse from thunder import ThunderContext if __name__ == "__main__": parser = optparse.OptionParser( description="compute summary statistics on time series data", usage="%prog datafile outputdir mode [options]") parser.add_option("--preprocess", action="store_true", default=False) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] mode = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="stats") data = tsc.loadSeries(datafile).cache() vals = data.seriesStat(mode) outputdir += "-stats" tsc.export(vals, outputdir, "stats_" + mode, "matlab")
stimulus_pulse = 1 if stimulus_pulse == 1: stimulus_on_time = [10,28,47,65,83,101] stimulus_off_time = [14,32,51,69,87,105] color_mat = ['#00FFFF','#0000A0','#800080','#FF00FF', '#800000','#A52A2A'] # Go into the main function that does pca for indiviudal trials from pca_thunder_analysis import run_analysis_individualodors from pca_thunder_analysis import run_analysis_eachodor from pca_thunder_analysis import run_analysis_allodor from thunder import ThunderContext print 'Starting Thunder Now. Check console for details' tsc = ThunderContext.start(appName="thunderpca") if files_to_do_PCA[0]== 1: run_analysis_individualodors(Exp_Folder, filename_save_prefix_forPCA, filename_save_prefix_for_textfile, pca_components_ind, num_pca_colors_ind, num_samples_ind, thresh_pca_ind, color_map_ind,\ tsc,redo_pca,reconstruct_pca, stimulus_on_time, stimulus_off_time,color_mat,required_pcs,time_baseline ) if files_to_do_PCA[1]== 1: run_analysis_eachodor(Exp_Folder, filename_save_prefix_forPCA, filename_save_prefix_for_textfile, pca_components_eachodor, num_pca_colors_eachodor, num_samples_eachodor, thresh_pca_eachodor, color_map_eachodor,\ tsc,redo_pca,reconstruct_pca, stimulus_on_time, stimulus_off_time,color_mat,required_pcs,time_baseline ) if files_to_do_PCA[2]== 1: run_analysis_allodor(Exp_Folder, filename_save_prefix_forPCA, filename_save_prefix_for_textfile, pca_components_allodor, num_pca_colors_allodor, num_samples_allodor, thresh_pca_allodor, color_map_allodor,\ tsc,redo_pca,reconstruct_pca, stimulus_on_time, stimulus_off_time,color_mat,required_pcs,time_baseline ) ############# Save all imput parameters with open(Exp_Folder+filename_save_prefix_forPCA+'_save_pca_variables', 'w') as f: pickle.dump([pca_components_ind, num_pca_colors_ind, num_samples_ind, thresh_pca_ind, color_map_ind,\
from thunder import ThunderContext, ICA, export if __name__ == "__main__": parser = optparse.OptionParser(description="do independent components analysis", usage="%prog datafile outputdir k c [options]") parser.add_option("--svdmethod", choices=("direct", "em"), default="direct") parser.add_option("--maxiter", type=float, default=100) parser.add_option("--tol", type=float, default=0.000001) parser.add_option("--seed", type=int, default=0) opts, args = parser.parse_args() try: datafile = args[0] outputdir = args[1] k = int(args[2]) c = int(args[3]) except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="ica") data = tsc.loadSeries(datafile).cache() model = ICA(k=k, c=c, svdmethod=opts.svdmethod, maxiter=opts.maxiter, tol=opts.tol, seed=opts.seed) result = model.fit(data) outputdir += "-ica" export(result.a, outputdir, "a", "matlab") export(result.sigs, outputdir, "sigs", "matlab")
""" Example standalone app for mass-univariate regression """ import optparse from thunder import ThunderContext, RegressionModel if __name__ == "__main__": parser = optparse.OptionParser(description="fit a regression model", usage="%prog datafile modelfile outputdir [options]") parser.add_option("--regressmode", choices=("mean", "linear", "bilinear"), default="linear", help="form of regression") opts, args = parser.parse_args() try: datafile = args[0] modelfile = args[1] outputdir = args[2] except IndexError: parser.print_usage() raise Exception("too few arguments") tsc = ThunderContext.start(appName="regress") data = tsc.loadText(datafile) result = RegressionModel.load(modelfile, opts.regressmode).fit(data) outputdir += "-regress" tsc.export(result.select('stats'), outputdir, "stats", "matlab") tsc.export(result.select('betas'), outputdir, "betas", "matlab")
def CompressImages(inputs, output, confObj): debugMode=False st=datetime.now() imageExt = confObj['ext'] imageHeight = confObj['dims'][0] imageWidth = confObj['dims'][1] refImgId = confObj['refImageId'] diffImageFolder = confObj['DiffImageFolder'] if debugMode == True: print confObj import glob totImages = len(glob.glob(inputs+"*."+imageExt)) if os.path.exists(output): shutil.rmtree(output) conf = SparkConf().setAppName('ImgCompress') sc = SparkContext(conf=conf) imageHeight = sc.broadcast(imageHeight) imageWidth = sc.broadcast(imageWidth) tsc = ThunderContext(sc) tscImages = tsc.loadImages(inputs, (imageHeight.value,imageWidth.value), imageExt, imageExt).cache() floatingPixelRdd = tscImages.rdd.flatMapValues(lambda r: r).zipWithIndex().map(lambda l: ((l[0][0],(l[1]-(l[0][0]*int(imageHeight.value)))),l[0][1]))\ .flatMapValues(lambda r: r).zipWithIndex().\ map(lambda l: ((l[0][0][1],(l[1]-(l[0][0][0]*int(imageWidth.value)*int(imageHeight.value) + l[0][0][1]*int(imageWidth.value)))),(l[0][0][0],l[0][1]))) if debugMode == True: floatingPixelRdd.saveAsTextFile(output+"\\Stage-1-FloatingPixel") temporalVoxelRdd = floatingPixelRdd.groupByKey().map(lambda x : (x[0], list(x[1]))).cache() if debugMode == True: temporalVoxelRdd.saveAsTextFile(output+"\\Stage-2-TemporalVoxel") iMapImageDict = {} for imgIndex in range(totImages): imgIndexBD = sc.broadcast(imgIndex) #-------------------------------HELPER FUNCTIONS------------------------------------- def ReAdjustImapList(l): intList = l[1] mKey = intList.pop(imgIndexBD.value)[1] return (mKey, intList) def calculateMedainForEachImage(l): intRdd = sc.parallelize(l[1]).flatMap(lambda l: l).groupByKey().map(lambda x : (x[0], list(x[1]))) intRddSorted = intRdd.map(lambda l: (l[0],sorted(l[1]))) intRddMedian = intRddSorted.map(lambda l: (l[0], l[1][int(len(l[1])/2)])) return intRddMedian.collect() #-------------------------------HELPER FUNCTIONS------------------------------------- imapRdd = temporalVoxelRdd.map(lambda l: ReAdjustImapList(l)).sortBy(lambda l: l[0], False)\ .groupByKey().map(lambda x : (x[0], list(x[1]))) if debugMode == True: imapRdd.saveAsTextFile(output+"\\Stage-3__IMAP-Stage-1_imgIdx-" + str(imgIndex)) imapRdd = imapRdd.flatMapValues(lambda l: l).flatMapValues(lambda l: l).map(lambda l: ((l[0],l[1][0]),l[1][1])).\ groupByKey().map(lambda x : (x[0], list(x[1]))).\ map(lambda l: (l[0],sorted(l[1]))).map(lambda l: (l[0], l[1][int(len(l[1])/2)])).\ map(lambda l: (l[0][0],(l[0][1], l[1]))) if debugMode == True: imapRdd.saveAsTextFile(output+"\\Stage-3__IMAP-Stage-2_imgIdx-" + str(imgIndex)) imapRdd = imapRdd.groupByKey().map(lambda x : (x[0], sorted(list(x[1]),key = lambda k: k[0]))) if debugMode == True: imapRdd.saveAsTextFile(output+"\\Stage-3__IMAP-Stage-3_imgIdx-" + str(imgIndex)) imapDict = imapRdd.collectAsMap() iMapImageDict[imgIndex] = imapDict iMapDictBD = sc.broadcast(iMapImageDict) refImageIdBD=sc.broadcast(refImgId) def CalcMapValue(pix, iMapVal): pImgIdx = pix[0] residual=0 for iIdx in iMapVal: if pImgIdx == iIdx[0]: residual = int(iIdx[1]) - pix[1] break return (pix[0], residual) def ApplyIMap(l): voxel = l[1] refIntensity = l[1][refImageIdBD.value][1] iMapValues= iMapDictBD.value[refImageIdBD.value][refIntensity] resdualValues=[] for pixel in voxel: if pixel[0] != refImageIdBD.value: resdualValues.append( CalcMapValue(pixel, iMapValues) ) else: resdualValues.append( pixel ) return (l[0], resdualValues) diffFVRdd = temporalVoxelRdd.map(lambda l: ApplyIMap(l)) if debugMode == True: diffFVRdd.saveAsTextFile(output+"\\Stage-4__Diff-Stage-1") residualImages = diffFVRdd.flatMapValues(lambda l: l).map(lambda l: ((l[1][0], l[0][0]),(l[0][1],l[1][1])))\ .groupByKey().map(lambda x : (x[0], sorted(list(x[1]),key = lambda k: (k[0]))))\ .map(lambda l: (l[0][0],(l[0][1],l[1])))\ .groupByKey().map(lambda x : (x[0], sorted(list(x[1]),key = lambda k: (k[0]))))\ .map(lambda l: (l[0], removeRCIndex(l[1]))) residualImages.saveAsTextFile(output+"\\"+diffImageFolder) for ingIdx in range(totImages): iMapImageDict[str(ingIdx)] = dict([(str(k), str(v)) for k, v in iMapImageDict[ingIdx].items()]) del iMapImageDict[ingIdx] import json with open(output + "\\imap.json",'w') as f: json.dump(iMapImageDict,f) with open(output + '\\conf.json', 'w') as f: json.dump(confObj, f) # clean up sc.stop() en=datetime.now() td = en-st print td