def test_correct_data_is_marked_as_change_point(self): seasonality_size = 5 seasonal_data = np.arange(seasonality_size) data = np.tile(seasonal_data, 3) data = np.append(data, [0, 100, 200, 300, 400]) # change distribution X_train = pd.Series(data, name="ts") training_seasons = 3 training_size = seasonality_size * training_seasons cpd = SsaChangePointDetector(confidence=95, change_history_length=8, training_window_size=training_size, seasonal_window_size=seasonality_size + 1) << {'result': 'ts'} cpd.fit(X_train, verbose=1) data = cpd.transform(X_train) self.assertEqual(data.loc[16, 'result.Alert'], 1.0) data = data.loc[data['result.Alert'] == 1.0] self.assertEqual(len(data), 1)
def test_multiple_user_specified_columns_is_not_allowed(self): path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) try: pipeline = Pipeline([ SsaChangePointDetector(columns=['t2', 't3'], change_history_length=5) ]) pipeline.fit_transform(data) except RuntimeError as e: self.assertTrue('Only one column is allowed' in str(e)) return self.fail()
LightGbmRanker(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()), 'SkipFilter': SkipFilter(count=5), 'TakeFilter': TakeFilter(count=100000), 'IidSpikeDetector': IidSpikeDetector(columns=['F0']), 'IidChangePointDetector': IidChangePointDetector(columns=['F0']), 'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['F0'], window_size=2, series_length=5, train_size=5, horizon=1), 'TensorFlowScorer': TensorFlowScorer(model_location=os.path.join(this, '..', 'nimbusml', 'examples', 'frozen_saved_model.pb'), columns={'c': ['a', 'b']}), } MULTI_OUTPUT_EX = [ 'FastLinearClassifier', 'FastLinearRegressor',
'PcaAnomalyDetector': PcaAnomalyDetector(rank=3), 'PcaTransformer': PcaTransformer(rank=2), 'PixelExtractor': Pipeline([ Loader(columns={'ImgPath': 'Path'}), PixelExtractor(columns={'ImgPixels': 'ImgPath'}), ]), 'PrefixColumnConcatenator': PrefixColumnConcatenator(columns={'Features': 'Sepal_'}), 'Resizer': Pipeline([ Loader(columns={'ImgPath': 'Path'}), Resizer(image_width=227, image_height=227, columns={'ImgResize': 'ImgPath'}) ]), 'SkipFilter': SkipFilter(count=5), 'SsaSpikeDetector': SsaSpikeDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['Sepal_Length'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['Sepal_Length'], window_size=2, series_length=5, train_size=5, horizon=1), 'RangeFilter': RangeFilter(min=5.0, max=5.1, columns=['Sepal_Length']), 'TakeFilter': TakeFilter(count=100), 'TensorFlowScorer': TensorFlowScorer( model_location=os.path.join( script_dir, '..', 'nimbusml', 'examples', 'frozen_saved_model.pb'), columns={'c': ['a', 'b']}),
# 10 0 # 11 1 # 12 2 # 13 3 # 14 4 # 15 0 # 16 100 # 17 200 # 18 300 # 19 400 training_seasons = 3 training_size = seasonality_size * training_seasons cpd = SsaChangePointDetector(confidence=95, change_history_length=8, training_window_size=training_size, seasonal_window_size=seasonality_size + 1) << {'result': 'ts'} cpd.fit(X_train, verbose=1) data = cpd.transform(X_train) print(data) # ts result.Alert result.Raw Score result.P-Value Score result.Martingale Score # 0 0 0.0 -2.531824 5.000000e-01 1.470334e-06 # 1 1 0.0 -0.008832 5.818072e-03 8.094459e-05 # 2 2 0.0 0.763040 1.374071e-01 2.588526e-04 # 3 3 0.0 0.693811 2.797713e-01 4.365186e-04 # 4 4 0.0 1.442079 1.838294e-01 1.074242e-03 # 5 0 0.0 -1.844414 1.707238e-01 2.825599e-03 # 6 1 0.0 0.219578 4.364025e-01 3.193633e-03
# data input (as a FileDataStream) path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) print(data.head()) # t1 t2 t3 # 0 0.01 0.01 0.0100 # 1 0.02 0.02 0.0200 # 2 0.03 0.03 0.0200 # 3 0.03 0.03 0.0250 # 4 0.03 0.03 0.0005 # define the training pipeline pipeline = Pipeline([ SsaChangePointDetector(columns={'t2_cp': 't2'}, change_history_length=4, training_window_size=8, seasonal_window_size=3) ]) result = pipeline.fit_transform(data) print(result) # t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score # 0 0.01 0.01 0.0100 0.0 -0.111334 5.000000e-01 0.001213 # 1 0.02 0.02 0.0200 0.0 -0.076755 4.862075e-01 0.001243 # 2 0.03 0.03 0.0200 0.0 -0.034871 3.856320e-03 0.099119 # 3 0.03 0.03 0.0250 0.0 -0.012559 8.617091e-02 0.482400 # 4 0.03 0.03 0.0005 0.0 -0.015723 2.252377e-01 0.988788 # 5 0.03 0.05 0.0100 0.0 -0.001133 1.767711e-01 2.457946 # 6 0.05 0.07 0.0500 0.0 0.006265 9.170460e-02 0.141898 # 7 0.07 0.09 0.0900 0.0 0.002383 2.701134e-01 0.050747