def test_chi_squared1(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', apply_funcs=[ dict(func=roll_norm_hist_mean_cov, hist_name='histogram', window=5, shift=1, suffix='', entire=True)]), ApplyFunc(apply_to_key='output_hist', apply_funcs=[dict(func=relative_chi_squared, suffix='', axis=1)]) ]) datastore = pipeline.transform(datastore={}) assert 'output_hist' in datastore for f in ['A_score', 'A_score:num_employees', 'bankrupt', 'country', 'num_employees']: assert f in datastore['output_hist'] df = datastore['output_hist']['A_score'] np.testing.assert_almost_equal(df['chi2'][6], 3.275000000000001) df = datastore['output_hist']['A_score:num_employees'] np.testing.assert_almost_equal(df['chi2'][-2], 2.1333333333333315) df = datastore['output_hist']['bankrupt'] np.testing.assert_almost_equal(df['chi2'][6], 0.19687500000000002) df = datastore['output_hist']['country'] np.testing.assert_almost_equal(df['chi2'][5], 0.8999999999999994) df = datastore['output_hist']['num_employees'] np.testing.assert_almost_equal(df['chi2'][5], 0.849999999999999)
def test_chi_squared2(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', apply_funcs=[ dict(func=expand_norm_hist_mean_cov, hist_name='histogram', shift=1, suffix='', entire=True)]), ApplyFunc(apply_to_key='output_hist', apply_funcs=[dict(func=relative_chi_squared, suffix='', axis=1)]) ]) datastore = pipeline.transform(datastore={}) assert 'output_hist' in datastore for f in ['A_score', 'A_score:num_employees', 'bankrupt', 'country', 'num_employees']: assert f in datastore['output_hist'] df = datastore['output_hist']['A_score'] np.testing.assert_almost_equal(df['chi2'][-1], 4.066666666666674) df = datastore['output_hist']['A_score:num_employees'] np.testing.assert_almost_equal(df['chi2'][-2], 3.217532467532462) df = datastore['output_hist']['bankrupt'] np.testing.assert_almost_equal(df['chi2'][-1], 0.11718750000000011) df = datastore['output_hist']['country'] np.testing.assert_almost_equal(df['chi2'][-1], 0.6093749999999999) df = datastore['output_hist']['num_employees'] np.testing.assert_almost_equal(df['chi2'][-1], 1.1858766233766194)
def test_apply_dynamic_traffic_light_bounds(): datastore = dict() datastore["to_profile"] = {"asc_numbers": get_test_data()} conf = {"monitoring_rules": {"*_pull": [7, 4, -4, -7]}} m1 = ApplyFunc( apply_to_key="to_profile", features=["asc_numbers"], metrics=["a", "b"] ) m1.add_apply_func(np.std, suffix="_std") m1.add_apply_func(np.mean, suffix="_mean") m2 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"]) m2.add_apply_func( pull, suffix="_pull", axis=1, suffix_mean="_mean", suffix_std="_std" ) m5 = DynamicBounds( read_key="to_profile", store_key="tl", rules=conf["monitoring_rules"], suffix_mean="_mean", suffix_std="_std", ) pipeline = Pipeline(modules=[m1, m2, m5]) datastore = pipeline.transform(datastore) assert "tl" in datastore test_data = datastore["tl"] assert "asc_numbers" in test_data p = test_data["asc_numbers"] tlcs = [ "traffic_light_a_red_high", "traffic_light_a_yellow_high", "traffic_light_a_yellow_low", "traffic_light_a_red_low", "traffic_light_b_red_high", "traffic_light_b_yellow_high", "traffic_light_b_yellow_low", "traffic_light_b_red_low", ] for c in tlcs: assert c in p.columns np.testing.assert_almost_equal(p["traffic_light_a_red_high"].values[0], 251.5624903) np.testing.assert_almost_equal( p["traffic_light_a_yellow_high"].values[0], 164.96428019 ) np.testing.assert_almost_equal( p["traffic_light_a_yellow_low"].values[0], -65.96428019 ) np.testing.assert_almost_equal( p["traffic_light_a_red_low"].values[0], -152.56249033 ) np.testing.assert_almost_equal(p["traffic_light_b_red_high"].values[0], 5.0) np.testing.assert_almost_equal(p["traffic_light_b_yellow_high"].values[0], 3.5) np.testing.assert_almost_equal(p["traffic_light_b_yellow_low"].values[0], -0.5) np.testing.assert_almost_equal(p["traffic_light_b_red_low"].values[0], -2.0)
def test_pull(): datastore = dict() datastore["to_profile"] = {"asc_numbers": get_test_data()} module1 = ApplyFunc(apply_to_key="to_profile") module1.add_apply_func(np.std, suffix="_std", entire=True) module1.add_apply_func(np.mean, suffix="_mean", entire=True) module2 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"]) module2.add_apply_func( pull, suffix="_pull", axis=1, suffix_mean="_mean", suffix_std="_std", cols=["a", "b"], ) pipeline = Pipeline(modules=[module1, module2]) datastore = pipeline.transform(datastore) p = datastore["to_profile"]["asc_numbers"] np.testing.assert_almost_equal(p["a_pull"].values[0], -1.714816) np.testing.assert_almost_equal(p["b_pull"].values[0], -1.0)
def test_variance_comparer(): datastore = dict() datastore["to_profile"] = test_comparer_df module1 = ApplyFunc(apply_to_key="to_profile", features=["the_feature", "dummy_feature"]) module1.add_apply_func(np.std, suffix="_std", entire=True) module1.add_apply_func(np.mean, suffix="_mean", entire=True) module2 = ApplyFunc(apply_to_key="to_profile", features=["the_feature", "dummy_feature"]) module2.add_apply_func(pull, suffix="_pull", axis=1, suffix_mean="_mean", suffix_std="_std") pipeline = Pipeline(modules=[module1, module2]) datastore = pipeline.transform(datastore) p = datastore["to_profile"]["the_feature"] np.testing.assert_almost_equal(p["mae_pull"].values[2], -0.1017973, 5) np.testing.assert_almost_equal(p["mae_pull"].values[3], 1.934149074, 6) p = datastore["to_profile"]["dummy_feature"] np.testing.assert_almost_equal(p["mae_pull"].values[0], -0.6107839182)
def test_chi_squared1(): hist_list = [ "date:country", "date:bankrupt", "date:num_employees", "date:A_score", "date:A_score:num_employees", ] pipeline = Pipeline( modules=[ JsonReader( file_path=resources.data("example_histogram.json"), store_key="example_hist", ), HistSplitter( read_key="example_hist", store_key="output_hist", features=hist_list ), ApplyFunc( apply_to_key="output_hist", apply_funcs=[ dict( func=roll_norm_hist_mean_cov, hist_name="histogram", window=5, shift=1, suffix="", entire=True, ) ], ), ApplyFunc( apply_to_key="output_hist", apply_funcs=[dict(func=relative_chi_squared, suffix="", axis=1)], ), ] ) datastore = pipeline.transform(datastore={}) assert "output_hist" in datastore for f in [ "A_score", "A_score:num_employees", "bankrupt", "country", "num_employees", ]: assert f in datastore["output_hist"] df = datastore["output_hist"]["A_score"] np.testing.assert_almost_equal(df["chi2"][6], 4.25) df = datastore["output_hist"]["A_score:num_employees"] np.testing.assert_almost_equal(df["chi2"][-2], 2.1333333333333315) df = datastore["output_hist"]["bankrupt"] np.testing.assert_almost_equal(df["chi2"][6], 0.40000000000000024) df = datastore["output_hist"]["country"] np.testing.assert_almost_equal(df["chi2"][5], 0.8999999999999994) df = datastore["output_hist"]["num_employees"] np.testing.assert_almost_equal(df["chi2"][5], 0.849999999999999)
def test_chi_squared2(): hist_list = [ "date:country", "date:bankrupt", "date:num_employees", "date:A_score", "date:A_score:num_employees", ] pipeline = Pipeline( modules=[ JsonReader( file_path=resources.data("example_histogram.json"), store_key="example_hist", ), HistSplitter( read_key="example_hist", store_key="output_hist", features=hist_list ), ApplyFunc( apply_to_key="output_hist", apply_funcs=[ dict( func=expand_norm_hist_mean_cov, hist_name="histogram", shift=1, suffix="", entire=True, ) ], ), ApplyFunc( apply_to_key="output_hist", apply_funcs=[dict(func=relative_chi_squared, suffix="", axis=1)], ), ] ) datastore = pipeline.transform(datastore={}) assert "output_hist" in datastore for f in [ "A_score", "A_score:num_employees", "bankrupt", "country", "num_employees", ]: assert f in datastore["output_hist"] df = datastore["output_hist"]["A_score"] np.testing.assert_almost_equal(df["chi2"][-1], 9.891821919006366) df = datastore["output_hist"]["A_score:num_employees"] np.testing.assert_almost_equal(df["chi2"][-2], 3.217532467532462) df = datastore["output_hist"]["bankrupt"] np.testing.assert_almost_equal(df["chi2"][-1], 0.23767605633802757) df = datastore["output_hist"]["country"] np.testing.assert_almost_equal(df["chi2"][-1], 1.3717532467532458) df = datastore["output_hist"]["num_employees"] np.testing.assert_almost_equal(df["chi2"][-1], 1.1858766233766194)
def test_report_traffic_light_bounds(): datastore = dict() datastore["to_profile"] = {"asc_numbers": get_test_data()} conf = { "monitoring_rules": { "the_feature:mae": [8, 4, 2, 0.15], "mse": [0.2, 0.11, 0.09, 0], "mae": [1, 0, 0, 0], "*_pull": [7, 4, -4, -7] }, "pull_rules": { "*_pull": [7, 4, -4, -7] } } m1 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"], metrics=['a', 'b']) m1.add_apply_func(expanding_mean, suffix='_std', entire=True) m1.add_apply_func(expanding_std, suffix='_mean', entire=True) m2 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"]) m2.add_apply_func(pull, suffix='_pull', axis=1, suffix_mean='_mean', suffix_std='_std') ctlb = ComputeTLBounds( read_key="to_profile", store_key="static_tlb", monitoring_rules=conf["monitoring_rules"], ) m3 = ComputeTLBounds(read_key="to_profile", monitoring_rules=conf["pull_rules"], apply_funcs_key="dynamic_tlb", func=pull_bounds, metrics_wide=True, axis=1) m4 = ApplyFunc( apply_to_key=m3.read_key, assign_to_key='dtlb', apply_funcs_key="dynamic_tlb", ) rg = SectionGenerator(read_key="to_profile", store_key="section", section_name="Profiles", dynamic_bounds='dtlb', static_bounds='static_tlb') pipeline = Pipeline(modules=[m1, m2, ctlb, m3, m4, rg]) datastore = pipeline.transform(datastore)
def test_rolling_window_funcs(): datastore = dict(to_profile={"asc_numbers": get_test_data()}) m = ApplyFunc( apply_to_key="to_profile", features=["asc_numbers"], metrics=["a", "b"] ) m.add_apply_func( rolling_mean, suffix="_rolling_3_mean", entire=True, window=3, shift=0 ) m.add_apply_func( rolling_lr, suffix="_rolling_10_slope", entire=True, window=10, index=0 ) m.add_apply_func( rolling_lr, suffix="_rolling_10_intercept", entire=True, window=10, index=1 ) datastore = Pipeline(modules=[m]).transform(datastore) feature_df = datastore["to_profile"]["asc_numbers"] np.testing.assert_array_almost_equal( feature_df["a_rolling_3_mean"].tolist(), [np.nan] * 2 + list(range(1, 99)) ) np.testing.assert_array_almost_equal( feature_df["a_rolling_10_slope"].tolist(), [np.nan] * 9 + [1.0] * 91 ) np.testing.assert_array_almost_equal( feature_df["a_rolling_10_intercept"].tolist(), [np.nan] * 9 + [float(i) for i in range(0, 91)], )
def test_hist_compare(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', apply_funcs=[dict(func=expanding_hist, shift=1, suffix='sum', entire=True, hist_name='histogram')]), ApplyFunc(apply_to_key='output_hist', assign_to_key='comparison', apply_funcs=[ dict(func=hist_compare, hist_name1='histogram', hist_name2='histogram_sum', suffix='', axis=1)]) ]) datastore = pipeline.transform(datastore={}) df = datastore['comparison']['num_employees'] np.testing.assert_array_equal(df['chi2'].values[-1], 0.7017543859649122)
def test_expanding_hist(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', apply_funcs=[dict(func=expanding_hist, shift=1, suffix='sum', entire=True, hist_name='histogram')]), ]) datastore = pipeline.transform(datastore={}) df = datastore['output_hist']['num_employees'] h = df['histogram_sum'].values[-1] bin_entries = h.hist.bin_entries() check = np.array([11., 1., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]) np.testing.assert_array_almost_equal(bin_entries, check)
def test_expand_norm_hist_mean_cov(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', apply_funcs=[ dict(func=expand_norm_hist_mean_cov, hist_name='histogram', shift=1, suffix='', entire=True)]) ]) datastore = pipeline.transform(datastore={}) assert 'output_hist' in datastore for f in ['A_score', 'A_score:num_employees', 'bankrupt', 'country', 'num_employees']: assert f in datastore['output_hist'] df = datastore['output_hist']['num_employees'] mean = df['histogram_mean'].values[-2] check = np.array([0.56666667, 0.03333333, 0.03333333, 0., 0., 0., 0., 0., 0., 0., 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0.06666667, 0., 0., 0., 0., 0., 0., 0., 0., 0.03333333, 0.06666667, 0.06666667]) np.testing.assert_array_almost_equal(mean, check)
def test_normalized_hist_mean_cov(): hist_list = ['date:country', 'date:bankrupt', 'date:num_employees', 'date:A_score', 'date:A_score:num_employees'] pipeline = Pipeline(modules=[ JsonReader(file_path=resources.data("example_histogram.json"), store_key="example_hist"), HistSplitter(read_key='example_hist', store_key='output_hist', features=hist_list), ApplyFunc(apply_to_key='output_hist', assign_to_key='output_hist', apply_funcs=[dict(func=normalized_hist_mean_cov, suffix='')]) ]) datastore = pipeline.transform(datastore={}) assert 'output_hist' in datastore for f in ['A_score', 'A_score:num_employees', 'bankrupt', 'country', 'num_employees']: assert f in datastore['output_hist'] df = datastore['output_hist']['A_score'] check = np.array([[0.22916667, -0.01041667, -0.0625, -0.13541667, -0.02083333], [-0.01041667, 0.015625, 0.01041667, -0.01354167, -0.00208333], [-0.0625, 0.01041667, 0.12916667, -0.06458333, -0.0125], [-0.13541667, -0.01354167, -0.06458333, 0.240625, -0.02708333], [-0.02083333, -0.00208333, -0.0125, -0.02708333, 0.0625]]) for hm, hc, hb in zip(df['histogram_mean'].values, df['histogram_cov'].values, df['histogram_binning'].values): np.testing.assert_array_almost_equal(hm, [0.3125, 0.03125, 0.1875, 0.40625, 0.0625]) np.testing.assert_array_almost_equal(hb, [1.5, 2.5, 3.5, 4.5, 5.5]) np.testing.assert_array_almost_equal(hc, check)
def test_traffic_light_summary_combination(): datastore = {"test_data": test_comparer_df} conf = { "monitoring_rules": { "the_feature:mae": [8, 4, 2, 0.15], "dummy_feature:*": [0, 0, 0, 0], "mse": [0.2, 0.11, 0.09, 0], "mae": [0, 0, 0, 0], "*": [0, 0, 0, 0], } } ctlb = ComputeTLBounds( read_key="test_data", store_key="traffic_light_bounds", apply_funcs_key="traffic_light_funcs", ignore_features=["dummy_feature"], monitoring_rules=conf["monitoring_rules"], prefix="tl_", ) atlb = ApplyFunc( apply_to_key=ctlb.read_key, assign_to_key="output_data", apply_funcs_key="traffic_light_funcs", ) tls = ApplyFunc( apply_to_key="output_data", apply_funcs=[dict(func=traffic_light_summary, axis=1, suffix="")], assign_to_key="alerts", ) asum = AlertsSummary(read_key="alerts") pipeline = Pipeline(modules=[ctlb, atlb, tls, asum]) datastore = pipeline.transform(datastore) alerts = datastore["alerts"] assert "_AGGREGATE_" in alerts output = datastore["alerts"]["_AGGREGATE_"] assert output["worst"].values[-1] == 2 assert output["n_green"].values[-1] == 1 assert output["n_yellow"].values[-1] == 0 assert output["n_red"].values[-1] == 1
def test_normalized_hist_mean_cov(): hist_list = [ "date:country", "date:bankrupt", "date:num_employees", "date:A_score", "date:A_score:num_employees", ] pipeline = Pipeline( modules=[ JsonReader( file_path=resources.data("example_histogram.json"), store_key="example_hist", ), HistSplitter( read_key="example_hist", store_key="output_hist", features=hist_list ), ApplyFunc( apply_to_key="output_hist", assign_to_key="output_hist", apply_funcs=[dict(func=normalized_hist_mean_cov, suffix="")], ), ] ) datastore = pipeline.transform(datastore={}) assert "output_hist" in datastore for f in [ "A_score", "A_score:num_employees", "bankrupt", "country", "num_employees", ]: assert f in datastore["output_hist"] df = datastore["output_hist"]["A_score"] check = np.array( [ [0.22916667, -0.01041667, -0.0625, -0.13541667, -0.02083333], [-0.01041667, 0.015625, 0.01041667, -0.01354167, -0.00208333], [-0.0625, 0.01041667, 0.12916667, -0.06458333, -0.0125], [-0.13541667, -0.01354167, -0.06458333, 0.240625, -0.02708333], [-0.02083333, -0.00208333, -0.0125, -0.02708333, 0.0625], ] ) for hm, hc, hb in zip( df["histogram_mean"].values, df["histogram_cov"].values, df["histogram_binning"].values, ): np.testing.assert_array_almost_equal( hm, [0.3125, 0.03125, 0.1875, 0.40625, 0.0625] ) np.testing.assert_array_almost_equal(hb, [1.5, 2.5, 3.5, 4.5, 5.5]) np.testing.assert_array_almost_equal(hc, check)
def test_traffic_light_summary(): datastore = { "test_data": test_comparer_df, } conf = { "monitoring_rules": { "the_feature:mae": [8, 4, 2, 0.15], "dummy_feature:*": [0, 0, 0, 0], "mse": [0.2, 0.11, 0.09, 0], "mae": [0, 0, 0, 0], "*": [0, 0, 0, 0], } } ctlb = ComputeTLBounds( read_key="test_data", store_key="traffic_light_bounds", apply_funcs_key="traffic_light_funcs", ignore_features=["dummy_feature"], monitoring_rules=conf["monitoring_rules"], prefix='tl_' ) atlb = ApplyFunc( apply_to_key=ctlb.read_key, assign_to_key='output_data', apply_funcs_key="traffic_light_funcs", ) tls = ApplyFunc(apply_to_key='output_data', apply_funcs=[dict(func=traffic_light_summary, axis=1, suffix='')], assign_to_key='alerts') pipeline = Pipeline(modules=[ctlb, atlb, tls]) datastore = pipeline.transform(datastore) output = datastore['alerts']["the_feature"] assert output["worst"].values[-1] == 2 assert output["n_green"].values[-1] == 1 assert output["n_yellow"].values[-1] == 0 assert output["n_red"].values[-1] == 1
def test_apply_func_module(): datastore = dict() datastore["to_profile"] = {"asc_numbers": get_test_data()} def func(x): return x + 1 module = ApplyFunc(apply_to_key="to_profile", store_key="profiled", features=["asc_numbers"]) module.add_apply_func(np.std, entire=True) module.add_apply_func(np.mean, entire=True) module.add_apply_func(func) datastore = module.transform(datastore) p = datastore["profiled"]["asc_numbers"] np.testing.assert_equal(p["a_mean"].values[0], 49.5) np.testing.assert_equal(p["b_mean"].values[0], 1.5) np.testing.assert_almost_equal(p["a_std"].values[0], 28.86607) np.testing.assert_almost_equal(p["b_std"].values[0], 0.5)
def test_integration_alerting(): datastore = { "test_data": test_comparer_df, } conf = { "monitoring_rules": { "the_feature:mae": [8, 4, 2, 0.15], "dummy_feature:*": [0, 0, 0, 0], "mse": [0.2, 0.11, 0.09, 0], "mae": [0, 0, 0, 0], "*": [0, 0, 0, 0], } } ctlb = ComputeTLBounds( read_key="test_data", store_key="traffic_light_bounds", apply_funcs_key="traffic_light_funcs", ignore_features=["dummy_feature"], monitoring_rules=conf["monitoring_rules"] ) atlb = ApplyFunc( apply_to_key=ctlb.read_key, assign_to_key='output_data', apply_funcs_key="traffic_light_funcs", ) pipeline = Pipeline(modules=[ctlb, atlb]) datastore = pipeline.transform(datastore) output = datastore[atlb.store_key]["the_feature"] alerts_per_color_per_date = pd.DataFrame() for i, color in enumerate(["green", "yellow", "red"]): alerts_per_color_per_date[f"n_{color}"] = (output.values == i).sum(axis=1) alerts_total_per_color = alerts_per_color_per_date.sum(axis=0) assert alerts_total_per_color["n_green"] == 5 assert alerts_total_per_color["n_yellow"] == 1 assert alerts_total_per_color["n_red"] == 4
def test_apply_static_traffic_light_bounds(): datastore = dict() datastore["to_profile"] = {"asc_numbers": get_test_data()} conf = {"monitoring_rules": {"*_pull": [7, 4, -4, -7]}} m1 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"], metrics=['a', 'b']) m1.add_apply_func(np.std, suffix='_std') m1.add_apply_func(np.mean, suffix='_mean') m2 = ApplyFunc(apply_to_key="to_profile", features=["asc_numbers"]) m2.add_apply_func(pull, suffix='_pull', axis=1, suffix_mean='_mean', suffix_std='_std') m5 = StaticBounds(read_key="to_profile", store_key='tl', rules=conf["monitoring_rules"], suffix_mean='_mean', suffix_std='_std') pipeline = Pipeline(modules=[m1, m2, m5]) datastore = pipeline.transform(datastore) assert 'tl' in datastore test_data = datastore['tl'] assert 'asc_numbers' in test_data p = test_data['asc_numbers'] tlcs = [ 'traffic_light_a_red_high', 'traffic_light_a_yellow_high', 'traffic_light_a_yellow_low', 'traffic_light_a_red_low', 'traffic_light_b_red_high', 'traffic_light_b_yellow_high', 'traffic_light_b_yellow_low', 'traffic_light_b_red_low', ] for c in tlcs: assert c in p.columns np.testing.assert_almost_equal(p["traffic_light_a_red_high"].values[1], 251.5624903) np.testing.assert_almost_equal(p["traffic_light_a_yellow_high"].values[1], 164.96428019) np.testing.assert_almost_equal(p["traffic_light_a_yellow_low"].values[1], -65.96428019) np.testing.assert_almost_equal(p["traffic_light_a_red_low"].values[1], -152.56249033) np.testing.assert_almost_equal(p["traffic_light_b_red_high"].values[1], 5.0) np.testing.assert_almost_equal(p["traffic_light_b_yellow_high"].values[1], 3.5) np.testing.assert_almost_equal(p["traffic_light_b_yellow_low"].values[1], -0.5) np.testing.assert_almost_equal(p["traffic_light_b_red_low"].values[1], -2.0)
def test_expand_norm_hist_mean_cov(): hist_list = [ "date:country", "date:bankrupt", "date:num_employees", "date:A_score", "date:A_score:num_employees", ] pipeline = Pipeline(modules=[ JsonReader( file_path=resources.data("example_histogram.json"), store_key="example_hist", ), HistSplitter(read_key="example_hist", store_key="output_hist", features=hist_list), ApplyFunc( apply_to_key="output_hist", apply_funcs=[ dict( func=expand_norm_hist_mean_cov, hist_name="histogram", shift=1, suffix="", entire=True, ) ], ), ]) datastore = pipeline.transform(datastore={}) assert "output_hist" in datastore for f in [ "A_score", "A_score:num_employees", "bankrupt", "country", "num_employees", ]: assert f in datastore["output_hist"] df = datastore["output_hist"]["num_employees"] mean = df["histogram_mean"].values[-2] check = np.array([ 0.56666667, 0.03333333, 0.03333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.06666667, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03333333, 0.06666667, 0.06666667, ]) np.testing.assert_array_almost_equal(mean, check)
def test_expanding_hist(): hist_list = [ "date:country", "date:bankrupt", "date:num_employees", "date:A_score", "date:A_score:num_employees", ] pipeline = Pipeline(modules=[ JsonReader( file_path=resources.data("example_histogram.json"), store_key="example_hist", ), HistSplitter(read_key="example_hist", store_key="output_hist", features=hist_list), ApplyFunc( apply_to_key="output_hist", apply_funcs=[ dict( func=expanding_hist, shift=1, suffix="sum", entire=True, hist_name="histogram", ) ], ), ]) datastore = pipeline.transform(datastore={}) df = datastore["output_hist"]["num_employees"] h = df["histogram_sum"].values[-1] bin_entries = h.bin_entries() check = np.array([ 11.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ]) np.testing.assert_array_almost_equal(bin_entries, check)