コード例 #1
0
 def loadinputdata(self, datafile, savefile=None):
     rdd = load(self.sc, datafile, preprocessmethod="dff-percentile")
     self.rdd = rdd
     self.datafile = datafile
     if savefile is not None:
         self.savefile = savefile
     self.modelfile = os.path.join(os.path.split(self.datafile)[0], 'stim')
コード例 #2
0
ファイル: thunderdatatest.py プロジェクト: EricSchles/thunder
 def loadinputdata(self, datafile, savefile=None):
     rdd = load(self.sc, datafile, preprocessmethod="dff-percentile")
     self.rdd = rdd
     self.datafile = datafile
     if savefile is not None:
         self.savefile = savefile
     self.modelfile = os.path.join(os.path.split(self.datafile)[0], 'stim')
コード例 #3
0
ファイル: kmeans.py プロジェクト: NEILKUANG/thunder
            for (i, j) in newpoints:
                centers[i] = j

            iter += 1

        return KMeansModel(centers)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="do kmeans clustering")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--maxiter", type=float, default=20, required=False)
    parser.add_argument("--tol", type=float, default=0.001, required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="kmeans")

    data = load(sc, args.datafile, args.preprocess).cache()
    model = KMeans(k=args.k, maxiter=args.maxiter, tol=args.tol).train(data)
    labels = model.predict(data)

    outputdir = args.outputdir + "-kmeans"
    save(model.centers, outputdir, "centers", "matlab")
    save(labels, outputdir, "labels", "matlab")
コード例 #4
0
ファイル: regress.py プロジェクト: vpomponiu/thunder
        else:
            r2 = 1 - sse / sst

        return b2[1:], r2, resid

REGRESSION_MODELS = {
    'linear': LinearRegressionModel,
    'bilinear': BilinearRegressionModel,
    'mean': MeanRegressionModel
}

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("modelfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression")
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="regress")

    data = load(sc, args.datafile, args.preprocess)
    stats, betas, resid = RegressionModel.load(args.modelfile, args.regressmode).fit(data)

    outputdir = args.outputdir + "-regress"
    save(stats, outputdir, "stats", "matlab")
    save(betas, outputdir, "betas", "matlab")
コード例 #5
0
ファイル: datasets.py プロジェクト: NEILKUANG/thunder
 def fromfile(self):
     if 'ec' not in self.sc.master:
         raise Exception("must be running on EC2 to load the example data sets")
     path = 's3n://zebrafish.datasets/optomotor-response/1/'
     return load(self.sc, path + 'data/dat_plane*.txt', npartitions=1000)
コード例 #6
0
ファイル: datasets.py プロジェクト: NEILKUANG/thunder
 def fromfile(self):
     return load(self.sc, os.path.join(self.path, 'data/fish.txt'))
コード例 #7
0
    parser.add_argument("--tuningmode", choices=("circular", "gaussian"), default="gaussian", required=False, help="form of tuning curve")
    parser.add_argument("--basename", type=str, default="-", required=False)
    parser.add_argument("--stim", type=str, default="-", required=False)

    args = parser.parse_args()

    sc = SparkContext(args.master, "myscript")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])
    
    # load data file
    datafile = os.path.join(args.datafolder, args.imagename)
    outputdir = os.path.join(args.datafolder,"spark")
    data = load(sc, datafile, args.preprocess, 4)
    
    # drop key
    data = data.map(lambda (k, v): (k[0:3], v))
    data.cache()

    # compute mean map
    vals = Stats("mean").calc(data)
    save(vals,outputdir,"mean_vals","matlab")

    # compute local cor
    if args.neighbourhood != 0:
        cor = LocalCorr(neighborhood=args.neighbourhood).calc(data)
        save(cor,outputdir,"local_corr","matlab")

    # if stim argument is not default
コード例 #8
0
ファイル: datasets.py プロジェクト: uklibaite/thunder
 def fromfile(self):
     return load(self.sc, os.path.join(self.path, 'data/iris.txt'))