class StatsTest(ThunderDataTest):
    def __init__(self, sc):
        ThunderDataTest.__init__(self, sc)
        self.method = Stats("std")

    def runtest(self):
        vals = self.method.calc(self.rdd)
        vals.count()
Esempio n. 2
0
class StatsTest(ThunderDataTest):

    def __init__(self, sc):
        ThunderDataTest.__init__(self, sc)
        self.method = Stats("std")

    def runtest(self):
        vals = self.method.calc(self.rdd)
        vals.count()
Esempio n. 3
0
    def test_stats(self):
        data_local = [
            array([1.0, 2.0, -4.0, 5.0]),
            array([2.0, 2.0, -4.0, 5.0]),
            array([3.0, 2.0, -4.0, 5.0]),
            array([4.0, 2.0, -4.0, 5.0]),
        ]

        data = self.sc.parallelize(zip(range(1, 5), data_local))
        data_local = array(data_local)

        vals = Stats("mean").calc(data).map(lambda (_, v): v)

        assert(allclose(vals.collect(), mean(data_local, axis=1)))

        vals = Stats("median").calc(data).map(lambda (_, v): v)
        assert(allclose(vals.collect(), median(data_local, axis=1)))

        vals = Stats("std").calc(data).map(lambda (_, v): v)
        assert(allclose(vals.collect(), std(data_local, axis=1)))

        vals = Stats("norm").calc(data).map(lambda (_, v): v)
        for i in range(0, 4):
            assert(allclose(vals.collect()[i], norm(data_local[i, :] - mean(data_local[i, :]))))
Esempio n. 4
0
    def test_stats(self):
        data_local = [
            array([1.0, 2.0, -4.0, 5.0]),
            array([2.0, 2.0, -4.0, 5.0]),
            array([3.0, 2.0, -4.0, 5.0]),
            array([4.0, 2.0, -4.0, 5.0]),
        ]

        data = self.sc.parallelize(zip(range(1, 5), data_local))
        data_local = array(data_local)

        vals = Stats("mean").calc(data).map(lambda (_, v): v)

        assert (allclose(vals.collect(), mean(data_local, axis=1)))

        vals = Stats("median").calc(data).map(lambda (_, v): v)
        assert (allclose(vals.collect(), median(data_local, axis=1)))

        vals = Stats("std").calc(data).map(lambda (_, v): v)
        assert (allclose(vals.collect(), std(data_local, axis=1)))

        vals = Stats("norm").calc(data).map(lambda (_, v): v)
        for i in range(0, 4):
            assert (allclose(vals.collect()[i],
                             norm(data_local[i, :] - mean(data_local[i, :]))))
Esempio n. 5
0
import os
import argparse
import glob
from pyspark import SparkContext
from thunder.timeseries import Stats
from thunder.utils import load
from thunder.utils import save

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="compute summary statistics on time series data")
    parser.add_argument("datafile", type=str)
    parser.add_argument("outputdir", type=str)
    parser.add_argument("mode",
                        choices=("mean", "median", "std", "norm"),
                        help="which summary statistic")
    parser.add_argument("--preprocess",
                        choices=("raw", "dff", "dff-highpass", "sub"),
                        default="raw",
                        required=False)

    args = parser.parse_args()

    sc = SparkContext(appName="stats")

    data = load(sc, args.datafile, args.preprocess).cache()
    vals = Stats(args.mode).calc(data)

    outputdir = args.outputdir + "-stats"
    save(vals, outputdir, "stats_" + args.mode, "matlab")
 def __init__(self, sc):
     ThunderDataTest.__init__(self, sc)
     self.method = Stats("std")
Esempio n. 7
0
 def __init__(self, sc):
     ThunderDataTest.__init__(self, sc)
     self.method = Stats("std")