def test_correct_data_is_marked_as_change_point(self): input_data = [5, 5, 5, 5, 5, 5, 5, 5] input_data.extend([7, 7, 7, 7, 7, 7, 7, 7]) X_train = pd.Series(input_data, name="ts") cpd = IidChangePointDetector(confidence=95, change_history_length=4) << { 'result': 'ts' } data = cpd.fit_transform(X_train) self.assertEqual(data.loc[8, 'result.Alert'], 1.0) data = data.loc[data['result.Alert'] == 1.0] self.assertEqual(len(data), 1)
def test_multiple_user_specified_columns_is_not_allowed(self): path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) try: pipeline = Pipeline([ IidChangePointDetector(columns=['t2', 't3'], change_history_length=5) ]) pipeline.fit_transform(data) except RuntimeError as e: self.assertTrue('Only one column is allowed' in str(e)) return self.fail()
############################################################################### # IidChangePointDetector import pandas as pd from nimbusml.timeseries import IidChangePointDetector # Create a sample series with a change input_data = [5, 5, 5, 5, 5, 5, 5, 5] input_data.extend([7, 7, 7, 7, 7, 7, 7, 7]) X_train = pd.Series(input_data, name="ts") cpd = IidChangePointDetector(confidence=95, change_history_length=4) << { 'result': 'ts' } data = cpd.fit_transform(X_train) print(data) # ts result.Alert result.Raw Score result.P-Value Score result.Martingale Score # 0 5 0.0 5.0 5.000000e-01 0.001213 # 1 5 0.0 5.0 5.000000e-01 0.001213 # 2 5 0.0 5.0 5.000000e-01 0.001213 # 3 5 0.0 5.0 5.000000e-01 0.001213 # 4 5 0.0 5.0 5.000000e-01 0.001213 # 5 5 0.0 5.0 5.000000e-01 0.001213 # 6 5 0.0 5.0 5.000000e-01 0.001213 # 7 5 0.0 5.0 5.000000e-01 0.001213 # 8 7 1.0 7.0 1.000000e-08 10298.666376 <-- alert is on, predicted changepoint # 9 7 0.0 7.0 1.328455e-01 33950.164799 # 10 7 0.0 7.0 2.613750e-01 60866.342063 # 11 7 0.0 7.0 3.776152e-01 78362.038772
'LightGbmRegressor': LightGbmRegressor(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'LightGbmRanker': LightGbmRanker(minimum_example_count_per_group=1, minimum_example_count_per_leaf=1), 'NGramFeaturizer': NGramFeaturizer(word_feature_extractor=n_gram()), 'SkipFilter': SkipFilter(count=5), 'TakeFilter': TakeFilter(count=100000), 'IidSpikeDetector': IidSpikeDetector(columns=['F0']), 'IidChangePointDetector': IidChangePointDetector(columns=['F0']), 'SsaSpikeDetector': SsaSpikeDetector(columns=['F0'], seasonal_window_size=2), 'SsaChangePointDetector': SsaChangePointDetector(columns=['F0'], seasonal_window_size=2), 'SsaForecaster': SsaForecaster(columns=['F0'], window_size=2, series_length=5, train_size=5, horizon=1), 'TensorFlowScorer': TensorFlowScorer(model_location=os.path.join(this, '..', 'nimbusml', 'examples', 'frozen_saved_model.pb'), columns={'c': ['a', 'b']}),
'FromKey': Pipeline([ ToKey(columns=['Sepal_Length']), FromKey(columns=['Sepal_Length']) ]), # GlobalContrastRowScaler currently requires a vector input to work 'GlobalContrastRowScaler': Pipeline([ ColumnConcatenator() << { 'concated_columns': [ 'Petal_Length', 'Sepal_Width', 'Sepal_Length']}, GlobalContrastRowScaler(columns={'normed_columns': 'concated_columns'}) ]), 'Handler': Handler(replace_with='Mean', columns={'NewVals': 'Petal_Length'}), 'IidSpikeDetector': IidSpikeDetector(columns=['Sepal_Length']), 'IidChangePointDetector': IidChangePointDetector(columns=['Sepal_Length']), 'Indicator': Indicator(columns={'Has_Nan': 'Petal_Length'}), 'KMeansPlusPlus': KMeansPlusPlus(n_clusters=3, feature=['Sepal_Width', 'Sepal_Length']), 'LightGbmRanker': LightGbmRanker(feature=['Class', 'dep_day', 'duration'], label='rank', group_id='group'), 'Loader': Loader(columns={'ImgPath': 'Path'}), 'LpScaler': Pipeline([ ColumnConcatenator() << { 'concated_columns': [ 'Petal_Length', 'Sepal_Width', 'Sepal_Length']}, LpScaler(columns={'normed_columns': 'concated_columns'}) ]), 'MutualInformationSelector': Pipeline([
# data input (as a FileDataStream) path = get_dataset('timeseries').as_filepath() data = FileDataStream.read_csv(path) print(data.head()) # t1 t2 t3 # 0 0.01 0.01 0.0100 # 1 0.02 0.02 0.0200 # 2 0.03 0.03 0.0200 # 3 0.03 0.03 0.0250 # 4 0.03 0.03 0.0005 # define the training pipeline pipeline = Pipeline( [IidChangePointDetector(columns={'t2_cp': 't2'}, change_history_length=4)]) result = pipeline.fit_transform(data) print(result) # t1 t2 t3 t2_cp.Alert t2_cp.Raw Score t2_cp.P-Value Score t2_cp.Martingale Score # 0 0.01 0.01 0.0100 0.0 0.01 5.000000e-01 1.212573e-03 # 1 0.02 0.02 0.0200 0.0 0.02 4.960106e-01 1.221347e-03 # 2 0.03 0.03 0.0200 0.0 0.03 1.139087e-02 3.672914e-02 # 3 0.03 0.03 0.0250 0.0 0.03 2.058296e-01 8.164447e-02 # 4 0.03 0.03 0.0005 0.0 0.03 2.804577e-01 1.373786e-01 # 5 0.03 0.05 0.0100 1.0 0.05 1.448886e-06 1.315014e+04 # 6 0.05 0.07 0.0500 0.0 0.07 2.616611e-03 4.941587e+04 # 7 0.07 0.09 0.0900 0.0 0.09 3.053187e-02 2.752614e+05 # 8 0.09 99.00 99.0000 0.0 99.00 1.000000e-08 1.389396e+12 # 9 1.10 0.10 0.1000 1.0 0.10 3.778296e-01 1.854344e+07