return corr if __name__ == "__main__": parser = argparse.ArgumentParser( description="correlate time series with neighbors") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("sz", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "localcorr") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() corrs = localcorr(data, args.sz) outputdir = args.outputdir + "-localcorr" save(corrs, outputdir, "corr", "matlab")
# join with the original time series data to compute correlations result = data.join(means) # get correlations corr = result.mapValues(lambda x: corrcoef(x[0], x[1])[0, 1]) return corr if __name__ == "__main__": parser = argparse.ArgumentParser(description="correlate time series with neighbors") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("sz", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() egg = glob.glob(os.path.join(os.environ["THUNDER_EGG"], "*.egg")) sc = SparkContext(args.master, "localcorr", pyFiles=egg) data = load(sc, args.datafile, args.preprocess).cache() corrs = localcorr(data, args.sz) outputdir = args.outputdir + "-localcorr" if not os.path.exists(outputdir): os.makedirs(outputdir) save(corrs, outputdir, "corr", "matlab")
if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) # load data file datafile = os.path.join(args.datafolder, args.imagename) outputdir = os.path.join(args.datafolder,"spark") data = load(sc, datafile, args.preprocess, 4) # drop key data = data.map(lambda (k, v): (k[0:3], v)) data.cache() # compute mean map vals = stats(data,"mean") save(vals,outputdir,"mean_vals","matlab") # compute local cor if args.neighbourhood != 0: cor = localcorr(data,args.neighbourhood) save(cor,outputdir,"local_corr","matlab") # if stim argument is not default if args.stim != '-': # parse into different stim names p = re.compile('-') stims = p.split(args.stim) # compute regression for i in range(len(stims)): modelfile = os.path.join(args.datafolder, args.basename + stims[i])
if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "regress") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess) stats, betas = regress(data, args.modelfile, args.regressmode) outputdir = args.outputdir + "-regress" save(stats, outputdir, "stats", "matlab") save(betas, outputdir, "betas", "matlab")
parser = argparse.ArgumentParser(description="do kmeans clustering") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "kmeans") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() labels, centers = kmeans(data, k=args.k, maxiter=args.maxiter, tol=args.tol) outputdir = args.outputdir + "-kmeans" save(labels, outputdir, "labels", "matlab") save(centers, outputdir, "centers", "matlab")
params = tuningmodel.fit(betas) else: # use data params = tuningmodel.fit(data) return params if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a parametric tuning curve to regression results") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("tuningmodelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("tuningmode", choices=("circular", "gaussian"), help="form of tuning curve") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) parser.add_argument("--regressmodelfile", type=str) parser.add_argument("--regressmode", choices=("linear", "bilinear"), help="form of regression") args = parser.parse_args() egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc = SparkContext(args.master, "tuning", pyFiles=egg) data = load(sc, args.datafile, args.preprocess).cache() params = tuning(data, args.tuningmodelfile, args.tuningmode, args.regressmodelfile, args.regressmode) outputdir = args.outputdir + "-tuning" if not os.path.exists(outputdir): os.makedirs(outputdir) save(params, outputdir, "params", "matlab")
betas, stats, resid = model.fit(data) return stats, betas if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "regress") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess) stats, betas = regress(data, args.modelfile, args.regressmode) outputdir = args.outputdir + "-regress" save(stats, outputdir, "stats", "matlab") save(betas, outputdir, "betas", "matlab")
parser.add_argument("tuningmodelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("tuningmode", choices=("circular", "gaussian"), help="form of tuning curve") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) parser.add_argument("--regressmodelfile", type=str) parser.add_argument("--regressmode", choices=("linear", "bilinear"), help="form of regression") args = parser.parse_args() sc = SparkContext(args.master, "tuning") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() params = tuning(data, args.tuningmodelfile, args.tuningmode, args.regressmodelfile, args.regressmode) outputdir = args.outputdir + "-tuning" save(params, outputdir, "params", "matlab")
co = out.mapValues(lambda x: x[0]) ph = out.mapValues(lambda x: x[1]) return co, ph if __name__ == "__main__": parser = argparse.ArgumentParser(description="compute a fourier transform on each time series") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("freq", type=int) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "fourier") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() co, ph = fourier(data, args.freq) outputdir = args.outputdir + "-fourier" save(co, outputdir, "co", "matlab") save(ph, outputdir, "ph", "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser( description= "query time series data by averaging values for given indices") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "query") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() ts = query(data, args.indsfile) outputdir = args.outputdir + "-query" save(ts, outputdir, "ts", "matlab")
description="do principal components analysis") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "pca") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() scores, latent, comps = pca(data, args.k, args.svdmethod) outputdir = args.outputdir + "-pca" save(comps, outputdir, "comps", "matlab") save(latent, outputdir, "latent", "matlab") save(scores, outputdir, "scores", "matlab")
parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("linear", "bilinear"), help="form of regression") parser.add_argument("--k", type=int, default=2) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "regresswithpca") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() stats, comps, latent, scores, traj = regresswithpca( data, args.modelfile, args.regressmode, args.k) outputdir = args.outputdir + "-regress" save(stats, outputdir, "stats", "matlab") save(comps, outputdir, "comps", "matlab") save(latent, outputdir, "latent", "matlab") save(scores, outputdir, "scores", "matlab") save(traj, outputdir, "traj", "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser( description="compute summary statistics on time series data") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "stats") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() vals = stats(data, args.mode) outputdir = args.outputdir + "-stats" save(vals, outputdir, "stats_" + args.mode, "matlab")
# do classification perf = clf.classify(data, featureset) return perf if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("paramfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("classifymode", choices="naivebayes", help="form of classifier") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "classify") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess) perf = classify(data, args.paramfile, args.classifymode) outputdir = args.outputdir + "-classify" save(perf, outputdir, "perf", "matlab")
traj = model.fit(data, comps) return stats, comps, latent, scores, traj if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() egg = glob.glob(os.path.join(os.environ["THUNDER_EGG"], "*.egg")) sc = SparkContext(args.master, "regress", pyFiles=egg) data = load(sc, args.datafile, args.preprocess).cache() stats, comps, latent, scores, traj = regresswithpca(data, args.modelfile, args.regressmode) outputdir = args.outputdir + "-regress" if not os.path.exists(outputdir): os.makedirs(outputdir) save(stats, outputdir, "stats", "matlab") save(comps, outputdir, "comps", "matlab") save(latent, outputdir, "latent", "matlab") save(scores, outputdir, "scores", "matlab") save(traj, outputdir, "traj", "matlab")
parser.add_argument("--maxiter", type=float, default=100, required=False) parser.add_argument("--tol", type=float, default=0.000001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) parser.add_argument("--seed", type=int, default=0, required=False) args = parser.parse_args() sc = SparkContext(args.master, "ica") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() w, sigs = ica(data, args.k, args.c, svdmethod=args.svdmethod, maxiter=args.maxiter, tol=args.tol, seed=args.seed) outputdir = args.outputdir + "-ica" save(w, outputdir, "w", "matlab") save(sigs, outputdir, "sigs", "matlab")
""" scores, latent, comps = svd(data, k, meansubtract=0, method=svdmethod) return scores, latent, comps if __name__ == "__main__": parser = argparse.ArgumentParser(description="do principal components analysis") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "pca") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() scores, latent, comps = pca(data, args.k, args.svdmethod) outputdir = args.outputdir + "-pca" save(comps, outputdir, "comps", "matlab") save(latent, outputdir, "latent", "matlab") save(scores, outputdir, "scores", "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="do independent components analysis") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("c", type=int) parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False) parser.add_argument("--maxiter", type=float, default=100, required=False) parser.add_argument("--tol", type=float, default=0.000001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) parser.add_argument("--seed", type=int, default=0, required=False) args = parser.parse_args() sc = SparkContext(args.master, "ica") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() w, sigs = ica(data, args.k, args.c, svdmethod=args.svdmethod, maxiter=args.maxiter, tol=args.tol, seed=args.seed) outputdir = args.outputdir + "-ica" save(w, outputdir, "w", "matlab") save(sigs, outputdir, "sigs", "matlab")
indsb = data.context.broadcast(method.inds[i]) ts[i, :] = data.filter(lambda (k, _): k in indsb.value).map( lambda (k, x): x).mean() return ts if __name__ == "__main__": parser = argparse.ArgumentParser(description="query time series data by averaging values for given indices") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("indsfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "query") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() ts = query(data, args.indsfile) outputdir = args.outputdir + "-query" save(ts, outputdir, "ts", "matlab")
labels = data.mapValues(lambda p: closestpoint(p, centers)) return labels, centers if __name__ == "__main__": parser = argparse.ArgumentParser(description="do kmeans clustering") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "kmeans") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess).cache() labels, centers = kmeans(data, k=args.k, maxiter=args.maxiter, tol=args.tol) outputdir = args.outputdir + "-kmeans" save(labels, outputdir, "labels", "matlab") save(centers, outputdir, "centers", "matlab")
method = SigProcessingMethod.load("stats", statistic=statistic) vals = method.calc(data) return vals if __name__ == "__main__": parser = argparse.ArgumentParser(description="compute summary statistics on time series data") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() egg = glob.glob(os.path.join(os.environ["THUNDER_EGG"], "*.egg")) sc = SparkContext(args.master, "ref", pyFiles=egg) data = load(sc, args.datafile, args.preprocess).cache() vals = stats(data, args.mode) outputdir = (args.outputdir + "-stats",) outputdir = args.outputdir + "-stats" if not os.path.exists(outputdir): os.makedirs(outputdir) save(vals, outputdir, "stats_" + args.mode, "matlab")
if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("master", type=str) parser.add_argument("datafile", type=str) parser.add_argument("paramfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("classifymode", choices="naivebayes", help="form of classifier") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(args.master, "classify") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) data = load(sc, args.datafile, args.preprocess) perf = classify(data, args.paramfile, args.classifymode) outputdir = args.outputdir + "-classify" save(perf, outputdir, "perf", "matlab")