def test_normalization_bywindow(self): y = array([1, 2, 3, 4, 5], dtype='float16') rdd = self.sc.parallelize([(0, y)]) data = TimeSeries(rdd, dtype='float16') out = data.normalize('window', window=2) vals = out.first()[1] assert_equals('float64', str(vals.dtype)) b_true = array([1.2, 1.4, 2.4, 3.4, 4.2]) result_true = (y - b_true) / (b_true + 0.1) assert(allclose(vals, result_true, atol=1e-3)) out = data.normalize('window', window=6) vals = out.first()[1] b_true = array([1.6, 1.8, 1.8, 1.8, 2.6]) result_true = (y - b_true) / (b_true + 0.1) assert(allclose(vals, result_true, atol=1e-3)) out = data.normalize('window-fast', window=2) vals = out.first()[1] assert_equals('float64', str(vals.dtype)) b_true = array([1, 1, 2, 3, 4]) result_true = (y - b_true) / (b_true + 0.1) assert(allclose(vals, result_true, atol=1e-3)) out = data.normalize('window-fast', window=5) vals = out.first()[1] b_true = array([1, 1, 2, 3, 4]) result_true = (y - b_true) / (b_true + 0.1) assert(allclose(vals, result_true, atol=1e-3))
def test_normalization_bypercentile(self): rdd = self.sc.parallelize([(0, array([1, 2, 3, 4, 5], dtype='float16'))]) data = TimeSeries(rdd, dtype='float16') out = data.normalize('percentile', perc=20) vals = out.first()[1] assert_equals('float16', str(vals.dtype)) assert(allclose(vals, array([-0.42105, 0.10526, 0.63157, 1.15789, 1.68421]), atol=1e-3))
def test_normalization_bymean(self): rdd = self.sc.parallelize([(0, array([1, 2, 3, 4, 5], dtype='float16'))]) data = TimeSeries(rdd, dtype='float16') out = data.normalize('mean') vals = out.first()[1] assert_equals('float16', str(vals.dtype)) assert(allclose(out.first()[1], array([-0.64516, -0.32258, 0.0, 0.32258, 0.64516]), atol=1e-3))