def test_parsing(self): builder = NodeFactory.detector("test_id", "SimpleThreshold") builder.set_param_value("inside", False) builder.set_param_value("strict", False) builder.set_param_value("lower", 1) builder.set_param_value("upper", None) threshold = builder.build() self.assertEqual(str(threshold), "SimpleThreshold(1,None,False,False)[test_id]") obj = { "id": "test_id", "group": "detector", "type": "SimpleThreshold", "params": [ { "id": "lower", "value": 1 }, { "id": "inside", "value": False }, { "id": "strict", "value": False }, ], } th_from_json = NodeFactory.from_json(obj) self.assertEqual(str(threshold), str(th_from_json))
def test_exog_seesaw(self): exog = Series.from_array([ [0, 1], [1, -1], [2, 1], [3, -1], [4, 1], [5, -1], [6, 1], [7, -1], [8, 1], [9, -1], ]) endog = TestSeriesBuilder.linear(10, 0, 1).build() factory = NodeFactory.transformer('test', 'SARIMAX') factory.set_param_value('p', '1') factory.set_param_value('d', 1) factory.set_param_value('output', 'resid') sarimax = factory.build() result, debug_info = sarimax.transform([endog, exog], True) expected_series = [0] * 8 actual_series = list(result.values) for i in range(0, len(expected_series)): self.assertAlmostEqual(expected_series[i], actual_series[i], 2) # With debug info self.assertEqual(set(['summary', 'offset_start', 'exog_coeff_x1']), set(debug_info.keys()))
def test_base_derivative_non_linear(self): self.maxDiff = None series1 = Series.from_array([ [0, 1], [1, 2], [2, 4], [3, 0], [4, 16], ]) factory = NodeFactory.transformer('test', 'Derivative') factory.set_param_value('lag', '1') factory.set_param_value('metric', 'sub') diff = factory.build() result, debug_info = diff.transform([series1], False) expected_series = Series.from_array([ [1, 1], [2, 2], [3, -4], [4, 16], ]) self.assertEqual(list(expected_series.pdseries.index), list(result.index)) self.assertEqual(list(expected_series.pdseries.values), list(result.values))
def build_test_node(id, sources, source_is_input=False): return { # Build Identity node # Set all bool params to false # Rest set to default 'id': id, 'group': 'transformer', 'type': 'Identity', 'params': [{ 'id': param['id'], 'value': not param['value'] if isinstance(param['value'], bool) else param['value'] } for param in NodeFactory.node_description( 'transformer', 'Identity')['params']], 'sources': [{ 'type': 'input' if source_is_input else 'node', 'ref': source } for source in sources] }
def test_analysis_histogram_heatmap(self): self.maxDiff = None series = Series.from_array([ [1628294400, 0], [1628337600, 0], [1628380800, 1], [1628424000, 1], [1628467200, 1], [1628510400, 0], [1628553600, 0], [1628596800, 0], ]) factory = NodeFactory.detector('test_node', 'SimpleThreshold') factory.set_param_value('inside', False) factory.set_param_value('strict', False) factory.set_param_value('lower', None) factory.set_param_value('upper', 1) factory.add_source(InputRef('input')) node = factory.build() pipeline = Pipeline([node]) analyzer = Analyzer(pipeline=pipeline, debug=True) analysis = analyzer.analyze({'input': series}) actual_output = analysis.output_format() expected_file = os.path.join(os.path.dirname(__file__), 'resources/analysis/expected_histogram_heatmap.json') # Uncomment to fix test # print(json.dumps(actual_output, indent=2), file=open(expected_file, 'w')) expected_output = json.loads(Path(expected_file).read_text()) self.assertEqual(expected_output, actual_output)
def test_linear_series_lags(self): series = TestSeriesBuilder.linear(10, 0, 1).build() factory = NodeFactory.transformer('test', 'Identity') factory.set_param_value('mean', True) factory.set_param_value('stddev', True) factory.set_param_value('adf_test', True) factory.set_param_value('normality_test', True) factory.set_param_value('histogram', True) factory.set_param_value('histogram_bins', 10) factory.set_param_value('cum_histogram', True) factory.set_param_value('cum_histogram_bins', 10) factory.set_param_value('acf', True) factory.set_param_value('acf_lags', 10) factory.set_param_value('pacf', True) factory.set_param_value('pacf_lags', 10) identity = factory.build() result, debug_info = identity.transform([series], True) expected_series = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] actual_series = list(result.values) for i in range(0, len(expected_series)): self.assertEqual(expected_series[i], actual_series[i]) # With debug info self.assertEqual(EXPECTED_KEYS, set(debug_info.keys())) self.assertEqual(4.5, debug_info['Mean']) self.assertAlmostEqual(3.02, debug_info['Std. dev.'], 1) # No debug info result, debug_info = identity.transform([series], False) self.assertEqual([], list(debug_info.keys()))
def test_node_list(self): actual = NodeFactory.nodes_list() dir = os.path.dirname(__file__) expected_file = os.path.join(dir, 'resources/expected_nodes.json') # Uncomment to fix test # print(json.dumps(actual, indent=2), file=open(expected_file, 'w')) expected = json.loads(Path(expected_file).read_text()) self.maxDiff = None self.assertEqual(expected, actual)
def test_rolling_aggregate_mean(self): factory = NodeFactory.transformer('test', 'RollingAggregate') factory.set_param_value('window', 2) factory.set_param_value('center', False) factory.set_param_value('min_periods', 0) factory.set_param_value('agg_method', 'mean') factory.add_source(InputRef('input')) ram = factory.build() self.case(ram, [0.0, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.0, 8.5, 7.5, 6.5, 5.5, 4.5, 3.5, 2.5, 1.5, 0.5])
def test_divide_discard_fill_zero_div(self): series1 = Series.from_array([[1, 1], [2, 2], [3, 3]]) series2 = Series.from_array([[1, 2], [2, 0], [3, 5]]) factory = NodeFactory.transformer('test', 'Divide') factory.set_param_value('zero_div', 0) divide = factory.build() result, debug_info = divide.transform([series1, series2], False) expected_series = pd.Series([1 / 2, 0.0, 3 / 5], index=series1.pdseries.index) self.assertEqual(list(expected_series.index), list(result.index)) self.assertEqual(list(expected_series.values), list(result.values))
def test_dropout_mean_prop(self): factory = NodeFactory.transformer('test', 'Dropout') factory.set_param_value('context_window', 2) factory.set_param_value('dropout_window', 1) factory.set_param_value('center', False) factory.set_param_value('min_periods', None) factory.set_param_value('agg_method', 'mean') factory.set_param_value('combine_method', 'prop') factory.set_param_value('combine_method_order', 'dropout-context') factory.add_source(InputRef('input')) ram = factory.build() self.case(ram, [1, 1, 3, 1, 1 / 3, 1, 1])
def test_exponential_moving_average_mean(self): factory = NodeFactory.transformer('test', 'ExponentialMovingAverage') factory.set_param_value('span', '4') factory.set_param_value('min_periods', '') factory.set_param_value('recursive', False) factory.set_param_value('agg_method', 'mean') factory.add_source(InputRef('input')) ewma = factory.build() expected_debug_info = {'alpha': 0.4} self.case(ewma, [2.095, 2.487, 2.283, 1.755, 1.041], expected_debug_info)
def test_multi_rolling_aggregate_correlation(self): factory = NodeFactory.transformer('test', 'MultiRollingAggregate') factory.set_param_value('window', '2') factory.set_param_value('center', False) factory.set_param_value('min_periods', '0') factory.set_param_value('agg_method', 'correlation_pearson') factory.add_source(InputRef('lhs')) factory.add_source(InputRef('rhs')) ram = factory.build() s1 = Series.from_array([[0, 1], [1, 2], [2, 3], [3, 4], [4, 5]]) s2 = Series.from_array([[0, 1], [1, 2], [2, 3], [3, 2], [4, 1]]) self.case(ram, s1, s2, list(s1.pdseries.index)[1:], [1, 1, -1, -1])
def test_multi_rolling_aggregate_mismatching_start(self): factory = NodeFactory.transformer('test', 'MultiRollingAggregate') factory.set_param_value('window', '1') factory.set_param_value('center', False) factory.set_param_value('min_periods', '0') factory.set_param_value('agg_method', 'proportion') factory.add_source(InputRef('lhs')) factory.add_source(InputRef('rhs')) ram = factory.build() s1 = Series.from_array([[0, 1], [1, 2], [2, 3], [3, 4]]) s2 = Series.from_array([[1, 5], [2, 5], [3, 5]]) self.case(ram, s1, s2, list(s2.pdseries.index), [2 / 5, 3 / 5, 4 / 5])
def test_dropout_centered_mean_sub(self): factory = NodeFactory.transformer('test', 'Dropout') factory.set_param_value('context_window', 4) factory.set_param_value('dropout_window', 2) factory.set_param_value('center', True) factory.set_param_value('min_periods', None) factory.set_param_value('agg_method', 'mean') factory.set_param_value('combine_method', 'sub') factory.set_param_value('combine_method_order', 'context-dropout') factory.add_source(InputRef('input')) ram = factory.build() self.case(ram, [1, 0, -2, 0, 1])
def test_multi_rolling_aggregate_proportion_window_min_periods_default( self): factory = NodeFactory.transformer('test', 'MultiRollingAggregate') factory.set_param_value('window', '2') factory.set_param_value('center', False) factory.set_param_value('min_periods', '') factory.set_param_value('agg_method', 'proportion') factory.add_source(InputRef('lhs')) factory.add_source(InputRef('rhs')) ram = factory.build() s1 = Series.from_array([[0, 1], [1, 2], [2, 3], [3, 4]]) s2 = Series.from_array([[0, 5], [1, 5], [2, 5], [3, 5]]) self.case(ram, s1, s2, list(s1.pdseries.index[1:]), [0.3, 0.5, 0.7])
def test_seesaw(self): series = Series.from_array([[0, 1], [1, -1], [2, 1], [3, -1], [4, 1], [5, -1]]) factory = NodeFactory.transformer('test', 'SARIMAX') factory.set_param_value('p', '1') factory.set_param_value('output', 'resid') sarimax = factory.build() result, debug_info = sarimax.transform([series], True) expected_series = [0] * 5 actual_series = list(result.values) for i in range(0, len(expected_series)): self.assertAlmostEqual(expected_series[i], actual_series[i], 2)
def test_analysis_without_debug(self): self.maxDiff = None series = self.build_triangle() factory = NodeFactory.transformer('test_node', 'RollingAggregate') factory.set_param_value('window', 5) factory.set_param_value('center', False) factory.set_param_value('min_periods', 0) factory.set_param_value('agg_method', 'max') factory.add_source(InputRef('input')) node = factory.build() pipeline = Pipeline([node]) analyzer = Analyzer(pipeline=pipeline, debug=False) analysis = analyzer.analyze({'input':series}) expected = { "anomalies": [], "series": { "input": [ [0, 0.0], [1000, 1.0], [2000, 2.0], [3000, 3.0], [4000, 4.0], [5000, 5.0], [6000, 6.0], [7000, 7.0], [8000, 8.0], [9000, 9.0], [10000, 9.0], [11000, 8.0], [12000, 7.0], [13000, 6.0], [14000, 5.0], [15000, 4.0], [16000, 3.0], [17000, 2.0], [18000, 1.0], [19000, 0.0], ] }, } self.assertEqual(expected, analysis.output_format())
def test_std_normalize(self): series = self.build_triangle() self.assertEqual( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], series.as_list()) factory = NodeFactory.transformer('std_normalize', 'StdNormalize') factory.add_source(InputRef('input')) stdnormalize = factory.build() self.case(series, stdnormalize, [ -1.5270292013639366, -1.1876893788386174, -0.8483495563132981, -0.5090097337879789, -0.16966991126265962, 0.16966991126265962, 0.5090097337879789, 0.8483495563132981, 1.1876893788386174, 1.5270292013639366, 1.5270292013639366, 1.1876893788386174, 0.8483495563132981, 0.5090097337879789, 0.16966991126265962, -0.16966991126265962, -0.5090097337879789, -0.8483495563132981, -1.1876893788386174, -1.5270292013639366 ])
def test_base_derivative_lag2(self): series1 = TestSeriesBuilder.linear(5, 0, 1).build() factory = NodeFactory.transformer('test', 'Derivative') factory.set_param_value('lag', '2') factory.set_param_value('metric', 'sub') diff = factory.build() result, debug_info = diff.transform([series1], False) expected_series = Series.from_array([ [2, 2], [3, 2], [4, 2], ]) self.assertEqual(list(expected_series.pdseries.index), list(result.index)) self.assertEqual(list(expected_series.pdseries.values), list(result.values))
def test_boxcox_fixed_lambda(self): series = self.build_triangle() factory = NodeFactory.transformer('boxcox', 'BoxCox') factory.set_param_value('lambda', 1) factory.add_source(InputRef('input')) boxcox = factory.build() expected_debug = { 'lambda': 1, 'fitted_lambda': 1, } self.maxDiff = None self.case(series, boxcox, [ 0.0, 5.0, 10.000000000000002, 14.999999999999998, 20.0, 20.0, 18.999999999999996, 17.999999999999996, 16.999999999999996, 16.0, 14.999999999999998, 14.0, 12.999999999999996, 12.0, 11.0, 10.000000000000002, 9.000000000000002, 8.000000000000002, 6.999999999999998, 5.999999999999999, 5.0, 3.999999999999999, 3.0, 2.0000000000000004, 1.0 ], expected_debug)
def test_boxcox_implied_lambda(self): series = self.build_triangle() factory = NodeFactory.transformer('boxcox', 'BoxCox') # factory.set_param_value('lambda', 1) factory.add_source(InputRef('input')) boxcox = factory.build() expected_debug = { 'lambda': None, 'fitted_lambda': 0.7569500835813396, } self.case(series, boxcox, [ 0.0, 3.8069844496343204, 6.792535931588043, 9.45326887138231, 11.915823446529231, 11.915823446529231, 11.435878585907972, 10.950062851581707, 10.457989331193566, 9.959222501860888, 9.45326887138231, 8.939565111800931, 8.417462798503934, 7.88620846728669, 7.344917074353449, 6.792535931588043, 6.227794497220499, 5.649132457366781, 5.054593159635213, 4.441659075192572, 3.8069844496343204, 3.1459316503179107, 2.45169446968289, 1.7134270251968304, 0.9114396216016829 ], expected_debug)
def test_linear_series_lags_fitted(self): series = TestSeriesBuilder.linear(10, 0, 1).build() factory = NodeFactory.transformer('test', 'AutoRegression') factory.set_param_value('lags', '2') factory.set_param_value('period', '') factory.set_param_value('output', 'predicted') ar = factory.build() result, debug_info = ar.transform([series], True) # print(debug_info) expected_series = series.as_list()[2:] actual_series = list(result.values) for i in range(0, len(expected_series)): self.assertAlmostEqual(expected_series[i], actual_series[i], 2) # With debug info self.assertEqual(set(['summary']), set(debug_info.keys())) # No debug info result, debug_info = ar.transform([series], False) self.assertEqual([], list(debug_info.keys()))
def test_fft_filter(self): series = self.build_triangle() self.assertEqual( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], series.as_list()) factory = NodeFactory.transformer('fft_filter', 'FFTFilter') factory.set_param_value('cutoff', '50%') factory.set_param_value('output', 'resid') factory.add_source(InputRef('input')) fft_filter = factory.build() self.case(series, fft_filter, [ 0.002508563093691407, 0.9927198663527221, 2.0113390739957944, 2.9857119332223716, 4.015838444032453, 4.984161555967546, 6.014288066777628, 6.988660926004205, 8.00728013364728, 8.997491436906309, 8.99749143690631, 8.007280133647278, 6.988660926004206, 6.014288066777628, 4.984161555967547, 4.015838444032454, 2.985711933222371, 2.0113390739957935, 0.9927198663527218, 0.002508563093691407 ])
def test_linear_series_lags(self): series = TestSeriesBuilder.linear(10, 0, 1).build() factory = NodeFactory.transformer('test', 'SARIMAX') factory.set_param_value('p', '1') factory.set_param_value('d', 1) # factory.set_param_value('q', '3') factory.set_param_value('output', 'resid') sarimax = factory.build() result, debug_info = sarimax.transform([series], True) expected_series = [0] * 8 actual_series = list(result.values) for i in range(0, len(expected_series)): self.assertAlmostEqual(expected_series[i], actual_series[i], 2) # With debug info self.assertEqual(set(['summary', 'offset_start']), set(debug_info.keys())) # No debug info result, debug_info = sarimax.transform([series], False) self.assertEqual([], list(debug_info.keys()))
def test_analysis_with_debug(self): self.maxDiff = None series = self.build_triangle() factory = NodeFactory.transformer('test_node', 'RollingAggregate') factory.set_param_value('window', 5) factory.set_param_value('center', False) factory.set_param_value('min_periods', 0) factory.set_param_value('agg_method', 'max') factory.add_source(InputRef('input')) node = factory.build() pipeline = Pipeline([node]) analyzer = Analyzer(pipeline=pipeline, debug=True) analysis = analyzer.analyze({'input': series}) actual_output = analysis.output_format() expected_file = os.path.join(os.path.dirname(__file__), 'resources/analysis/expected_simplified.json') # Uncomment to fix test # print(json.dumps(actual_output, indent=2), file=open(expected_file, 'w')) expected_output = json.loads(Path(expected_file).read_text()) self.assertEqual(expected_output, actual_output)
def prepare_factory(self): factory = NodeFactory.detector('test', 'SimpleThreshold') factory.add_source(InputRef('input')) return factory
def prepare_factory(self): factory = NodeFactory.detector('test', 'Quantile') factory.add_source(InputRef('input')) return factory