class StatsTest(ThunderDataTest): def __init__(self, sc): ThunderDataTest.__init__(self, sc) self.method = Stats("std") def runtest(self): vals = self.method.calc(self.rdd) vals.count()
def test_stats(self): data_local = [ array([1.0, 2.0, -4.0, 5.0]), array([2.0, 2.0, -4.0, 5.0]), array([3.0, 2.0, -4.0, 5.0]), array([4.0, 2.0, -4.0, 5.0]), ] data = self.sc.parallelize(zip(range(1, 5), data_local)) data_local = array(data_local) vals = Stats("mean").calc(data).map(lambda (_, v): v) assert(allclose(vals.collect(), mean(data_local, axis=1))) vals = Stats("median").calc(data).map(lambda (_, v): v) assert(allclose(vals.collect(), median(data_local, axis=1))) vals = Stats("std").calc(data).map(lambda (_, v): v) assert(allclose(vals.collect(), std(data_local, axis=1))) vals = Stats("norm").calc(data).map(lambda (_, v): v) for i in range(0, 4): assert(allclose(vals.collect()[i], norm(data_local[i, :] - mean(data_local[i, :]))))
def test_stats(self): data_local = [ array([1.0, 2.0, -4.0, 5.0]), array([2.0, 2.0, -4.0, 5.0]), array([3.0, 2.0, -4.0, 5.0]), array([4.0, 2.0, -4.0, 5.0]), ] data = self.sc.parallelize(zip(range(1, 5), data_local)) data_local = array(data_local) vals = Stats("mean").calc(data).map(lambda (_, v): v) assert (allclose(vals.collect(), mean(data_local, axis=1))) vals = Stats("median").calc(data).map(lambda (_, v): v) assert (allclose(vals.collect(), median(data_local, axis=1))) vals = Stats("std").calc(data).map(lambda (_, v): v) assert (allclose(vals.collect(), std(data_local, axis=1))) vals = Stats("norm").calc(data).map(lambda (_, v): v) for i in range(0, 4): assert (allclose(vals.collect()[i], norm(data_local[i, :] - mean(data_local[i, :]))))
import os import argparse import glob from pyspark import SparkContext from thunder.timeseries import Stats from thunder.utils import load from thunder.utils import save if __name__ == "__main__": parser = argparse.ArgumentParser( description="compute summary statistics on time series data") parser.add_argument("datafile", type=str) parser.add_argument("outputdir", type=str) parser.add_argument("mode", choices=("mean", "median", "std", "norm"), help="which summary statistic") parser.add_argument("--preprocess", choices=("raw", "dff", "dff-highpass", "sub"), default="raw", required=False) args = parser.parse_args() sc = SparkContext(appName="stats") data = load(sc, args.datafile, args.preprocess).cache() vals = Stats(args.mode).calc(data) outputdir = args.outputdir + "-stats" save(vals, outputdir, "stats_" + args.mode, "matlab")
def __init__(self, sc): ThunderDataTest.__init__(self, sc) self.method = Stats("std")