def loadinputdata(self, datafile, savefile=None): rdd = load(self.sc, datafile, preprocessmethod="dff-percentile") self.rdd = rdd self.datafile = datafile if savefile is not None: self.savefile = savefile self.modelfile = os.path.join(os.path.split(self.datafile)[0], 'stim')
for (i, j) in newpoints: centers[i] = j iter += 1 return KMeansModel(centers) if __name__ == "__main__": parser = argparse.ArgumentParser(description="do kmeans clustering") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("k", type=int) parser.add_argument("--maxiter", type=float, default=20, required=False) parser.add_argument("--tol", type=float, default=0.001, required=False) parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="kmeans") data = load(sc, args.datafile, args.preprocess).cache() model = KMeans(k=args.k, maxiter=args.maxiter, tol=args.tol).train(data) labels = model.predict(data) outputdir = args.outputdir + "-kmeans" save(model.centers, outputdir, "centers", "matlab") save(labels, outputdir, "labels", "matlab")
else: r2 = 1 - sse / sst return b2[1:], r2, resid REGRESSION_MODELS = { 'linear': LinearRegressionModel, 'bilinear': BilinearRegressionModel, 'mean': MeanRegressionModel } if __name__ == "__main__": parser = argparse.ArgumentParser(description="fit a regression model") parser.add_argument("datafile", type=str) parser.add_argument("modelfile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression") parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile" "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="regress") data = load(sc, args.datafile, args.preprocess) stats, betas, resid = RegressionModel.load(args.modelfile, args.regressmode).fit(data) outputdir = args.outputdir + "-regress" save(stats, outputdir, "stats", "matlab") save(betas, outputdir, "betas", "matlab")
def fromfile(self): if 'ec' not in self.sc.master: raise Exception("must be running on EC2 to load the example data sets") path = 's3n://zebrafish.datasets/optomotor-response/1/' return load(self.sc, path + 'data/dat_plane*.txt', npartitions=1000)
def fromfile(self): return load(self.sc, os.path.join(self.path, 'data/fish.txt'))
parser.add_argument("--tuningmode", choices=("circular", "gaussian"), default="gaussian", required=False, help="form of tuning curve") parser.add_argument("--basename", type=str, default="-", required=False) parser.add_argument("--stim", type=str, default="-", required=False) args = parser.parse_args() sc = SparkContext(args.master, "myscript") if args.master != "local": egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg")) sc.addPyFile(egg[0]) # load data file datafile = os.path.join(args.datafolder, args.imagename) outputdir = os.path.join(args.datafolder,"spark") data = load(sc, datafile, args.preprocess, 4) # drop key data = data.map(lambda (k, v): (k[0:3], v)) data.cache() # compute mean map vals = Stats("mean").calc(data) save(vals,outputdir,"mean_vals","matlab") # compute local cor if args.neighbourhood != 0: cor = LocalCorr(neighborhood=args.neighbourhood).calc(data) save(cor,outputdir,"local_corr","matlab") # if stim argument is not default
def fromfile(self): return load(self.sc, os.path.join(self.path, 'data/iris.txt'))