self.comps = svd.v return self if __name__ == "__main__": parser = argparse.ArgumentParser( description="do principal components analysis") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="pca") data = load(sc, args.datafile, args.preprocess).cache() result = PCA(args.k, args.svdmethod).fit(data) outputdir = args.outputdir + "-pca" save(result.comps, outputdir, "comps", "matlab") save(result.latent, outputdir, "latent", "matlab") save(result.scores, outputdir, "scores", "matlab")
parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("sigfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("lag", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="crosscorr") data = load(sc, args.datafile, args.preprocess).cache() outputdir = args.outputdir + "-crosscorr" # post-process data with pca if lag greater than 0 vals = CrossCorr(args.sigfile, args.lag).calc(data) if args.lag is not 0: out = PCA(2).fit(vals) save(out.comps, outputdir, "comps", "matlab") save(out.latent, outputdir, "latent", "matlab") save(out.scores, outputdir, "scores", "matlab") else: save(vals, outputdir, "betas", "matlab")
# reduce by key to get the average time series for each neighborhood means = neighbors.reduceByKey(lambda x, y: x + y).mapValues(lambda x: x / ((2*self.neighborhood+1)**2)) # join with the original time series data to compute correlations result = data.join(means) # get correlations corr = result.mapValues(lambda x: corrcoef(x[0], x[1])[0, 1]) # force sorting, but reverse keys for correct ordering return corr.map(lambda (k, v): (k[::-1], v)).sortByKey().map(lambda (k, v): (k[::-1], v)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="correlate time series with neighbors") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("sz", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="localcorr") data = tsc.loadText(args.datafile, filter=args.preprocess).cache() corrs = LocalCorr(args.sz).calc(data) outputdir = args.outputdir + "-localcorr" save(corrs, outputdir, "corr", "matlab")
self.w = w self.a = a self.sigs = sigs return self if __name__ == "__main__": parser = argparse.ArgumentParser(description="do independent components analysis") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("c", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--maxiter", type=float, default=100, required=False) parser.add_argument("--tol", type=float, default=0.000001, required=False) parser.add_argument("--seed", type=int, default=0, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="ica") data = load(sc, args.datafile, args.preprocess).cache() result = ICA(args.k, args.c, args.svdmethod, args.maxiter, args.tol, args.seed).fit(data) outputdir = args.outputdir + "-ica" save(result.w, outputdir, "w", "matlab") save(result.sigs, outputdir, "sigs", "matlab")
else: r2 = 1 - sse / sst return b2[1:], r2, resid REGRESSION_MODELS = { 'linear': LinearRegressionModel, 'bilinear': BilinearRegressionModel, 'mean': MeanRegressionModel } if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="regress") data = load(sc, args.datafile, args.preprocess) stats, betas, resid = RegressionModel.load(args.modelfile, args.regressmode).fit(data) outputdir = args.outputdir + "-regress" save(stats, outputdir, "stats", "matlab") save(betas, outputdir, "betas", "matlab")
from thunder.regression import RegressionModel from thunder.factorization import PCA from thunder.utils import load from thunder.utils import save if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_argument("--k", type=int, default=2) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="regresswithpca") data = load(sc, args.datafile, args.preprocess) model = RegressionModel.load(args.modelfile, args.regressmode) # do regression betas, stats, resid = model.fit(data) pca = PCA(args.k).fit(betas) # do PCA traj = model.fit(data, pca.comps) # get trajectories outputdir = args.outputdir + "-regress" save(pca.comps, outputdir, "comps", "matlab") save(pca.latent, outputdir, "latent", "matlab") save(pca.scores, outputdir, "scores", "matlab") save(traj, outputdir, "traj", "matlab")
parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--nmfmethod", choices="als", default="als", required=False) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--w_hist", type=bool, default=False, required=False) parser.add_argument("--recon_hist", type=bool, default=False, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub", "dff-percentile"), default="dff-percentile", required=False) args = parser.parse_args() sc = SparkContext(args.master, "nmf") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() nmf = NMF(k=args.k, method=args.nmfmethod, maxiter=args.maxiter, tol=args.tol, w_hist=args.w_hist, recon_hist=args.recon_hist) nmf.calc(data) outputdir = args.outputdir + "-nmf" save(nmf.w, outputdir, "w", "matlab") save(nmf.h, outputdir, "h", "matlab") if args.w_hist: save(nmf.w_convergence, outputdir, "w_convergence", "matlab") if args.recon_hist: save(nmf.rec_err, outputdir, "rec_err", "matlab")
data.center(0) svd = SVD(k=self.k, method=self.svdmethod) svd.calc(data) self.scores = svd.u self.latent = svd.s self.comps = svd.v return self if __name__ == "__main__": parser = argparse.ArgumentParser(description="do principal components analysis") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="pca") data = tsc.loadText(args.datafile, args.preprocess).cache() result = PCA(args.k, args.svdmethod).fit(data) outputdir = args.outputdir + "-pca" save(result.comps, outputdir, "comps", "matlab") save(result.latent, outputdir, "latent", "matlab") save(result.scores, outputdir, "scores", "matlab")
values = zeros((self.n, len(data.first()[1]))) for idx, indlist in enumerate(self.inds): if len(indlist) > 0: values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist) keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0) self.keys = keys self.values = values return self if __name__ == "__main__": parser = argparse.ArgumentParser(description="query data by averaging values for given indices") parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="query") data = tsc.loadText(args.datafile, args.preprocess).cache() qry = Query(args.indsfile).calc(data) outputdir = args.outputdir + "-query" save(qry.keys, outputdir, "centers", "matlab") save(qry.values, outputdir, "ts", "matlab")
ts[i, :] = self.select( data, i).map(lambda (k, x): x).sum() / len(self.inds[i]) return ts if __name__ == "__main__": parser = argparse.ArgumentParser( description= "query time series data by averaging values for given indices") parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="query") data = load(sc, args.datafile, args.preprocess).cache() ts = Query(args.indsfile).calc(data) outputdir = args.outputdir + "-query" save(ts, outputdir, "ts", "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="do non-negative matrix factorization") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--nmfmethod", choices="als", default="als", required=False) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--w_hist", type=bool, default=False, required=False) parser.add_argument("--recon_hist", type=bool, default=False, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="nmf") data = tsc.loadText(args.datafile, args.preprocess).cache() nmf = NMF(k=args.k, method=args.nmfmethod, maxiter=args.maxiter, tol=args.tol, w_hist=args.w_hist, recon_hist=args.recon_hist) nmf.calc(data) outputdir = args.outputdir + "-nmf" save(nmf.w, outputdir, "w", "matlab") save(nmf.h, outputdir, "h", "matlab") if args.w_hist: save(nmf.w_convergence, outputdir, "w_convergence", "matlab") if args.recon_hist: save(nmf.recon_err, outputdir, "rec_err", "matlab")
import argparse import glob from pyspark import SparkContext from thunder.timeseries import Fourier from thunder.utils import load from thunder.utils import save if __name__ == "__main__": parser = argparse.ArgumentParser(description="compute a fourier transform on each time series") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("freq", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "fourier") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() out = Fourier(freq=args.freq).calc(data) outputdir = args.outputdir + "-fourier" save(out, outputdir, "fourier", "matlab")
return t CLASSIFIERS = { 'gaussnaivebayes': GaussNaiveBayesClassifier, 'ttest': TTestClassifier } if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("paramfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("classifymode", choices="naivebayes", help="form of classifier") parser.add_argument("--featureset", type=array, default="None", required=False) parser.add_argument("--cv", type=int, default="0", required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start("classify") data = tsc.loadText(args.datafile, args.preprocess) clf = MassUnivariateClassifier.load(args.paramfile, args.classifymode, cv=args.cv) perf = clf.classify(data, args.featureset) outputdir = args.outputdir + "-classify" save(perf, outputdir, "perf", "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("sigfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("lag", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "crosscorr") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() outputdir = args.outputdir + "-crosscorr" # post-process data with pca if lag greater than 0 vals = CrossCorr(args.sigfile, args.lag).calc(data) if args.lag is not 0: out = PCA(2).fit(vals) save(out.comps, outputdir, "comps", "matlab") save(out.latent, outputdir, "latent", "matlab") save(out.scores, outputdir, "scores", "matlab") else: save(vals, outputdir, "betas", "matlab")
# loop over indices, averaging time series ts = zeros((self.n, len(data.first()[1]))) for i in range(0, self.n): ts[i, :] = self.select(data, i).map(lambda (k, x): x).sum() / len(self.inds[i]) return ts if __name__ == "__main__": parser = argparse.ArgumentParser(description="query time series data by averaging values for given indices") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-percentile", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "query") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() ts = Query(args.indsfile).calc(data) outputdir = args.outputdir + "-query" save(ts, outputdir, "ts", "matlab")
from thunder.regression import RegressionModel from thunder.factorization import PCA from thunder.utils import ThunderContext from thunder.utils import save if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_argument("--k", type=int, default=2) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="regresswithpca") data = tsc.loadText(args.datafile, args.preprocess) model = RegressionModel.load(args.modelfile, args.regressmode) # do regression betas, stats, resid = model.fit(data) pca = PCA(args.k).fit(betas) # do PCA traj = model.fit(data, pca.comps) # get trajectories outputdir = args.outputdir + "-regress" save(pca.comps, outputdir, "comps", "matlab") save(pca.latent, outputdir, "latent", "matlab") save(pca.scores, outputdir, "scores", "matlab") save(traj, outputdir, "traj", "matlab")
if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) # load data file datafile = os.path.join(args.datafolder, args.imagename) outputdir = os.path.join(args.datafolder,"spark") data = load(sc, datafile, args.preprocess, 4) # drop key data = data.map(lambda (k, v): (k[0:3], v)) data.cache() # compute mean map vals = Stats("mean").calc(data) save(vals,outputdir,"mean_vals","matlab") # compute local cor if args.neighbourhood != 0: cor = LocalCorr(neighborhood=args.neighbourhood).calc(data) save(cor,outputdir,"local_corr","matlab") # if stim argument is not default if args.stim != '-': # parse into different stim names p = re.compile('-') stims = p.split(args.stim) # compute regression for i in range(len(stims)): modelfile = os.path.join(args.datafolder, args.basename + stims[i])
choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) parser.add_argument("--regressmodelfile", type=str) parser.add_argument("--regressmode", choices=("linear", "bilinear"), help="form of regression") args = parser.parse_args() sc = SparkContext(args.master, "tuning") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess) tuningmodel = TuningModel.load(args.tuningmodelfile, args.tuningmode) if args.regressmodelfile is not None: # use regression results regressmodel = RegressionModel.load(args.regressmodelfile, args.regressmode) betas, stats, resid = regressmodel.fit(data) params = tuningmodel.fit(betas) else: # use data params = tuningmodel.fit(data) outputdir = args.outputdir + "-tuning" save(params, outputdir, "params", "matlab")
import os import argparse import glob from pyspark import SparkContext from thunder.timeseries import Stats from thunder.utils import load from thunder.utils import save if __name__ == "__main__": parser = argparse.ArgumentParser( description="compute summary statistics on time series data") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="stats") data = load(sc, args.datafile, args.preprocess).cache() vals = Stats(args.mode).calc(data) outputdir = args.outputdir + "-stats" save(vals, outputdir, "stats_" + args.mode, "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a parametric tuning curve to regression results") parser.add_argument("datafile", type=str) parser.add_argument("tuningmodelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("tuningmode", choices=("circular", "gaussian"), help="form of tuning curve") parser.add_argument("--regressmodelfile", type=str) parser.add_argument("--regressmode", choices=("linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="tuning") data = load(sc, args.datafile, args.preprocess) tuningmodel = TuningModel.load(args.tuningmodelfile, args.tuningmode) if args.regressmodelfile is not None: # use regression results regressmodel = RegressionModel.load(args.regressmodelfile, args.regressmode) betas, stats, resid = regressmodel.fit(data) params = tuningmodel.fit(betas) else: # use data params = tuningmodel.fit(data) outputdir = args.outputdir + "-tuning" save(params, outputdir, "params", "matlab")
def __init__(self, statistic): self.func = { 'median': lambda x: median(x), 'mean': lambda x: mean(x), 'std': lambda x: std(x), 'norm': lambda x: norm(x - mean(x)), }[statistic] def get(self, y): """Compute the statistic""" return self.func(y) if __name__ == "__main__": parser = argparse.ArgumentParser(description="compute summary statistics on time series data") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic") parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="stats") data = tsc.loadText(args.datafile, args.preprocess).cache() vals = Stats(args.mode).calc(data) outputdir = args.outputdir + "-stats" save(vals, outputdir, "stats_" + args.mode, "matlab")
for (i, j) in newpoints: centers[i] = j iter += 1 return KMeansModel(centers) if __name__ == "__main__": parser = argparse.ArgumentParser(description="do kmeans clustering") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="kmeans") data = load(sc, args.datafile, args.preprocess).cache() model = KMeans(k=args.k, maxiter=args.maxiter, tol=args.tol).train(data) labels = model.predict(data) outputdir = args.outputdir + "-kmeans" save(model.centers, outputdir, "centers", "matlab") save(labels, outputdir, "labels", "matlab")
self.keys = keys self.values = values return self if __name__ == "__main__": parser = argparse.ArgumentParser( description="query data by averaging values for given indices") parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() tsc = ThunderContext.start(appName="query") data = tsc.loadText(args.datafile, args.preprocess).cache() qry = Query(args.indsfile).calc(data) outputdir = args.outputdir + "-query" save(qry.keys, outputdir, "centers", "matlab") save(qry.values, outputdir, "ts", "matlab")