Beispiel #1
0
        self.comps = svd.v

        return self


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="do principal components analysis")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--svdmethod",
                        choices=("direct", "em"),
                        default="direct",
                        required=False)
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "dff-highpass", "sub"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="pca")

    data = load(sc, args.datafile, args.preprocess).cache()
    result = PCA(args.k, args.svdmethod).fit(data)

    outputdir = args.outputdir + "-pca"
    save(result.comps, outputdir, "comps", "matlab")
    save(result.latent, outputdir, "latent", "matlab")
    save(result.scores, outputdir, "scores", "matlab")
Beispiel #2
0
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("sigfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("lag", type=int)
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "sub", "dff-highpass",
                                 "dff-percentile"
                                 "dff-detrendnonlin",
                                 "dff-detrend-percentile"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="crosscorr")

    data = load(sc, args.datafile, args.preprocess).cache()

    outputdir = args.outputdir + "-crosscorr"

    # post-process data with pca if lag greater than 0
    vals = CrossCorr(args.sigfile, args.lag).calc(data)
    if args.lag is not 0:
        out = PCA(2).fit(vals)
        save(out.comps, outputdir, "comps", "matlab")
        save(out.latent, outputdir, "latent", "matlab")
        save(out.scores, outputdir, "scores", "matlab")
    else:
        save(vals, outputdir, "betas", "matlab")
Beispiel #3
0
        # reduce by key to get the average time series for each neighborhood
        means = neighbors.reduceByKey(lambda x, y: x + y).mapValues(lambda x: x / ((2*self.neighborhood+1)**2))

        # join with the original time series data to compute correlations
        result = data.join(means)

        # get correlations
        corr = result.mapValues(lambda x: corrcoef(x[0], x[1])[0, 1])

        # force sorting, but reverse keys for correct ordering
        return corr.map(lambda (k, v): (k[::-1], v)).sortByKey().map(lambda (k, v): (k[::-1], v))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="correlate time series with neighbors")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("sz", type=int)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="localcorr")

    data = tsc.loadText(args.datafile, filter=args.preprocess).cache()
    corrs = LocalCorr(args.sz).calc(data)

    outputdir = args.outputdir + "-localcorr"
    save(corrs, outputdir, "corr", "matlab")
Beispiel #4
0
        self.w = w
        self.a = a
        self.sigs = sigs

        return self


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="do independent components analysis")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("c", type=int)
    parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False)
    parser.add_argument("--maxiter", type=float, default=100, required=False)
    parser.add_argument("--tol", type=float, default=0.000001, required=False)
    parser.add_argument("--seed", type=int, default=0, required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()
    
    sc = SparkContext(appName="ica")

    data = load(sc, args.datafile, args.preprocess).cache()
    result = ICA(args.k, args.c, args.svdmethod, args.maxiter, args.tol, args.seed).fit(data)

    outputdir = args.outputdir + "-ica"
    save(result.w, outputdir, "w", "matlab")
    save(result.sigs, outputdir, "sigs", "matlab")
Beispiel #5
0
        else:
            r2 = 1 - sse / sst

        return b2[1:], r2, resid

REGRESSION_MODELS = {
    'linear': LinearRegressionModel,
    'bilinear': BilinearRegressionModel,
    'mean': MeanRegressionModel
}

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("modelfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression")
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="regress")

    data = load(sc, args.datafile, args.preprocess)
    stats, betas, resid = RegressionModel.load(args.modelfile, args.regressmode).fit(data)

    outputdir = args.outputdir + "-regress"
    save(stats, outputdir, "stats", "matlab")
    save(betas, outputdir, "betas", "matlab")
Beispiel #6
0
from thunder.regression import RegressionModel
from thunder.factorization import PCA
from thunder.utils import load
from thunder.utils import save


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("modelfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression")
    parser.add_argument("--k", type=int, default=2)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()
    
    sc = SparkContext(appName="regresswithpca")

    data = load(sc, args.datafile, args.preprocess)
    model = RegressionModel.load(args.modelfile, args.regressmode)  # do regression
    betas, stats, resid = model.fit(data)
    pca = PCA(args.k).fit(betas)  # do PCA
    traj = model.fit(data, pca.comps)  # get trajectories

    outputdir = args.outputdir + "-regress"
    save(pca.comps, outputdir, "comps", "matlab")
    save(pca.latent, outputdir, "latent", "matlab")
    save(pca.scores, outputdir, "scores", "matlab")
    save(traj, outputdir, "traj", "matlab")
Beispiel #7
0
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--nmfmethod", choices="als", default="als", required=False)
    parser.add_argument("--maxiter", type=float, default=20, required=False)
    parser.add_argument("--tol", type=float, default=0.001, required=False)
    parser.add_argument("--w_hist", type=bool, default=False, required=False)
    parser.add_argument("--recon_hist", type=bool, default=False, required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub", "dff-percentile"),
                        default="dff-percentile", required=False)

    args = parser.parse_args()

    sc = SparkContext(args.master, "nmf")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])

    data = load(sc, args.datafile, args.preprocess).cache()
    nmf = NMF(k=args.k, method=args.nmfmethod, maxiter=args.maxiter, tol=args.tol, w_hist=args.w_hist,
              recon_hist=args.recon_hist)
    nmf.calc(data)

    outputdir = args.outputdir + "-nmf"
    save(nmf.w, outputdir, "w", "matlab")
    save(nmf.h, outputdir, "h", "matlab")
    if args.w_hist:
        save(nmf.w_convergence, outputdir, "w_convergence", "matlab")
    if args.recon_hist:
        save(nmf.rec_err, outputdir, "rec_err", "matlab")
Beispiel #8
0
        data.center(0)
        svd = SVD(k=self.k, method=self.svdmethod)
        svd.calc(data)

        self.scores = svd.u
        self.latent = svd.s
        self.comps = svd.v

        return self


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="do principal components analysis")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--svdmethod", choices=("direct", "em"), default="direct", required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="pca")

    data = tsc.loadText(args.datafile, args.preprocess).cache()
    result = PCA(args.k, args.svdmethod).fit(data)

    outputdir = args.outputdir + "-pca"
    save(result.comps, outputdir, "comps", "matlab")
    save(result.latent, outputdir, "latent", "matlab")
    save(result.scores, outputdir, "scores", "matlab")
Beispiel #9
0
        values = zeros((self.n, len(data.first()[1])))
        for idx, indlist in enumerate(self.inds):
            if len(indlist) > 0:
                values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist)
                keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0)

        self.keys = keys
        self.values = values

        return self


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="query data by averaging values for given indices")
    parser.add_argument("datafile", type=str)
    parser.add_argument("indsfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="query")

    data = tsc.loadText(args.datafile, args.preprocess).cache()
    qry = Query(args.indsfile).calc(data)

    outputdir = args.outputdir + "-query"
    save(qry.keys, outputdir, "centers", "matlab")
    save(qry.values, outputdir, "ts", "matlab")
Beispiel #10
0
                ts[i, :] = self.select(
                    data, i).map(lambda (k, x): x).sum() / len(self.inds[i])

        return ts


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "query time series data by averaging values for given indices")
    parser.add_argument("datafile", type=str)
    parser.add_argument("indsfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "sub", "dff-highpass",
                                 "dff-percentile"
                                 "dff-detrendnonlin",
                                 "dff-detrend-percentile"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="query")

    data = load(sc, args.datafile, args.preprocess).cache()
    ts = Query(args.indsfile).calc(data)

    outputdir = args.outputdir + "-query"
    save(ts, outputdir, "ts", "matlab")
Beispiel #11
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="do non-negative matrix factorization")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--nmfmethod", choices="als", default="als", required=False)
    parser.add_argument("--maxiter", type=float, default=20, required=False)
    parser.add_argument("--tol", type=float, default=0.001, required=False)
    parser.add_argument("--w_hist", type=bool, default=False, required=False)
    parser.add_argument("--recon_hist", type=bool, default=False, required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="nmf")

    data = tsc.loadText(args.datafile, args.preprocess).cache()
    nmf = NMF(k=args.k, method=args.nmfmethod, maxiter=args.maxiter, tol=args.tol, w_hist=args.w_hist,
              recon_hist=args.recon_hist)
    nmf.calc(data)

    outputdir = args.outputdir + "-nmf"
    save(nmf.w, outputdir, "w", "matlab")
    save(nmf.h, outputdir, "h", "matlab")
    if args.w_hist:
        save(nmf.w_convergence, outputdir, "w_convergence", "matlab")
    if args.recon_hist:
        save(nmf.recon_err, outputdir, "rec_err", "matlab")
Beispiel #12
0
import argparse
import glob
from pyspark import SparkContext
from thunder.timeseries import Fourier
from thunder.utils import load
from thunder.utils import save



if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="compute a fourier transform on each time series")
    parser.add_argument("master", type=str)
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("freq", type=int)
    parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(args.master, "fourier")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])

    data = load(sc, args.datafile, args.preprocess).cache()
    out = Fourier(freq=args.freq).calc(data)

    outputdir = args.outputdir + "-fourier"
    save(out, outputdir, "fourier", "matlab")
Beispiel #13
0
        return t


CLASSIFIERS = {
    'gaussnaivebayes': GaussNaiveBayesClassifier,
    'ttest': TTestClassifier
}

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("paramfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("classifymode", choices="naivebayes", help="form of classifier")
    parser.add_argument("--featureset", type=array, default="None", required=False)
    parser.add_argument("--cv", type=int, default="0", required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start("classify")

    data = tsc.loadText(args.datafile, args.preprocess)
    clf = MassUnivariateClassifier.load(args.paramfile, args.classifymode, cv=args.cv)
    perf = clf.classify(data, args.featureset)

    outputdir = args.outputdir + "-classify"
    save(perf, outputdir, "perf", "matlab")
Beispiel #14
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("master", type=str)
    parser.add_argument("datafile", type=str)
    parser.add_argument("sigfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("lag", type=int)
    parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(args.master, "crosscorr")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])
    
    data = load(sc, args.datafile, args.preprocess).cache()

    outputdir = args.outputdir + "-crosscorr"

    # post-process data with pca if lag greater than 0
    vals = CrossCorr(args.sigfile, args.lag).calc(data)
    if args.lag is not 0:
        out = PCA(2).fit(vals)
        save(out.comps, outputdir, "comps", "matlab")
        save(out.latent, outputdir, "latent", "matlab")
        save(out.scores, outputdir, "scores", "matlab")
    else:
        save(vals, outputdir, "betas", "matlab")
Beispiel #15
0
        # loop over indices, averaging time series
        ts = zeros((self.n, len(data.first()[1])))
        for i in range(0, self.n):
            ts[i, :] = self.select(data, i).map(lambda (k, x): x).sum() / len(self.inds[i])

        return ts


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="query time series data by averaging values for given indices")
    parser.add_argument("master", type=str)
    parser.add_argument("datafile", type=str)
    parser.add_argument("indsfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("--preprocess", choices=("raw", "dff", "dff-percentile", "dff-highpass", "sub"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(args.master, "query")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])

    data = load(sc, args.datafile, args.preprocess).cache()
    ts = Query(args.indsfile).calc(data)

    outputdir = args.outputdir + "-query"
    save(ts, outputdir, "ts", "matlab")
Beispiel #16
0
from thunder.regression import RegressionModel
from thunder.factorization import PCA
from thunder.utils import ThunderContext
from thunder.utils import save


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a regression model")
    parser.add_argument("datafile", type=str)
    parser.add_argument("modelfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("regressmode", choices=("mean", "linear", "bilinear"), help="form of regression")
    parser.add_argument("--k", type=int, default=2)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()
    
    tsc = ThunderContext.start(appName="regresswithpca")

    data = tsc.loadText(args.datafile, args.preprocess)
    model = RegressionModel.load(args.modelfile, args.regressmode)  # do regression
    betas, stats, resid = model.fit(data)
    pca = PCA(args.k).fit(betas)  # do PCA
    traj = model.fit(data, pca.comps)  # get trajectories

    outputdir = args.outputdir + "-regress"
    save(pca.comps, outputdir, "comps", "matlab")
    save(pca.latent, outputdir, "latent", "matlab")
    save(pca.scores, outputdir, "scores", "matlab")
    save(traj, outputdir, "traj", "matlab")
    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])
    
    # load data file
    datafile = os.path.join(args.datafolder, args.imagename)
    outputdir = os.path.join(args.datafolder,"spark")
    data = load(sc, datafile, args.preprocess, 4)
    
    # drop key
    data = data.map(lambda (k, v): (k[0:3], v))
    data.cache()

    # compute mean map
    vals = Stats("mean").calc(data)
    save(vals,outputdir,"mean_vals","matlab")

    # compute local cor
    if args.neighbourhood != 0:
        cor = LocalCorr(neighborhood=args.neighbourhood).calc(data)
        save(cor,outputdir,"local_corr","matlab")

    # if stim argument is not default
    if args.stim != '-':
        # parse into different stim names
        p = re.compile('-')
        stims = p.split(args.stim)

        # compute regression
        for i in range(len(stims)):
            modelfile = os.path.join(args.datafolder, args.basename + stims[i])
Beispiel #18
0
                        choices=("raw", "dff", "dff-highpass", "sub"),
                        default="raw",
                        required=False)
    parser.add_argument("--regressmodelfile", type=str)
    parser.add_argument("--regressmode",
                        choices=("linear", "bilinear"),
                        help="form of regression")

    args = parser.parse_args()

    sc = SparkContext(args.master, "tuning")

    if args.master != "local":
        egg = glob.glob(os.path.join(os.environ['THUNDER_EGG'], "*.egg"))
        sc.addPyFile(egg[0])

    data = load(sc, args.datafile, args.preprocess)
    tuningmodel = TuningModel.load(args.tuningmodelfile, args.tuningmode)
    if args.regressmodelfile is not None:
        # use regression results
        regressmodel = RegressionModel.load(args.regressmodelfile,
                                            args.regressmode)
        betas, stats, resid = regressmodel.fit(data)
        params = tuningmodel.fit(betas)
    else:
        # use data
        params = tuningmodel.fit(data)

    outputdir = args.outputdir + "-tuning"
    save(params, outputdir, "params", "matlab")
Beispiel #19
0
import os
import argparse
import glob
from pyspark import SparkContext
from thunder.timeseries import Stats
from thunder.utils import load
from thunder.utils import save

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="compute summary statistics on time series data")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("mode",
                        choices=("mean", "median", "std", "norm"),
                        help="which summary statistic")
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "dff-highpass", "sub"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="stats")

    data = load(sc, args.datafile, args.preprocess).cache()
    vals = Stats(args.mode).calc(data)

    outputdir = args.outputdir + "-stats"
    save(vals, outputdir, "stats_" + args.mode, "matlab")
Beispiel #20
0

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="fit a parametric tuning curve to regression results")
    parser.add_argument("datafile", type=str)
    parser.add_argument("tuningmodelfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("tuningmode", choices=("circular", "gaussian"), help="form of tuning curve")
    parser.add_argument("--regressmodelfile", type=str)
    parser.add_argument("--regressmode", choices=("linear", "bilinear"), help="form of regression")
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()
    
    sc = SparkContext(appName="tuning")

    data = load(sc, args.datafile, args.preprocess)
    tuningmodel = TuningModel.load(args.tuningmodelfile, args.tuningmode)
    if args.regressmodelfile is not None:
        # use regression results
        regressmodel = RegressionModel.load(args.regressmodelfile, args.regressmode)
        betas, stats, resid = regressmodel.fit(data)
        params = tuningmodel.fit(betas)
    else:
        # use data
        params = tuningmodel.fit(data)

    outputdir = args.outputdir + "-tuning"
    save(params, outputdir, "params", "matlab")
Beispiel #21
0
    def __init__(self, statistic):
        self.func = {
            'median': lambda x: median(x),
            'mean': lambda x: mean(x),
            'std': lambda x: std(x),
            'norm': lambda x: norm(x - mean(x)),
        }[statistic]

    def get(self, y):
        """Compute the statistic"""

        return self.func(y)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="compute summary statistics on time series data")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic")
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="stats")

    data = tsc.loadText(args.datafile, args.preprocess).cache()
    vals = Stats(args.mode).calc(data)

    outputdir = args.outputdir + "-stats"
    save(vals, outputdir, "stats_" + args.mode, "matlab")
Beispiel #22
0
            for (i, j) in newpoints:
                centers[i] = j

            iter += 1

        return KMeansModel(centers)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="do kmeans clustering")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("k", type=int)
    parser.add_argument("--maxiter", type=float, default=20, required=False)
    parser.add_argument("--tol", type=float, default=0.001, required=False)
    parser.add_argument("--preprocess", choices=("raw", "dff", "sub", "dff-highpass", "dff-percentile"
                        "dff-detrendnonlin", "dff-detrend-percentile"), default="raw", required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="kmeans")

    data = load(sc, args.datafile, args.preprocess).cache()
    model = KMeans(k=args.k, maxiter=args.maxiter, tol=args.tol).train(data)
    labels = model.predict(data)

    outputdir = args.outputdir + "-kmeans"
    save(model.centers, outputdir, "centers", "matlab")
    save(labels, outputdir, "labels", "matlab")
Beispiel #23
0
        self.keys = keys
        self.values = values

        return self


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="query data by averaging values for given indices")
    parser.add_argument("datafile", type=str)
    parser.add_argument("indsfile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "sub", "dff-highpass",
                                 "dff-percentile"
                                 "dff-detrendnonlin",
                                 "dff-detrend-percentile"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    tsc = ThunderContext.start(appName="query")

    data = tsc.loadText(args.datafile, args.preprocess).cache()
    qry = Query(args.indsfile).calc(data)

    outputdir = args.outputdir + "-query"
    save(qry.keys, outputdir, "centers", "matlab")
    save(qry.values, outputdir, "ts", "matlab")