def rise_benchmarking(): for i in range(len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) rise = fb.RandomIntervalSpectralForest(n_estimators=100) exp.run_experiment(overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISE", classifier=rise, dataset=dataset, train_file=False) steps = [('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)), ('transform', FeatureUnion([('acf', RowTransformer( FunctionTransformer(func=acf_coefs, validate=False))), ('ps', RowTransformer( FunctionTransformer(func=powerspectrum, validate=False)))])), ('tabularise', Tabularizer()), ('clf', DecisionTreeClassifier())] base_estimator = Pipeline(steps) rise = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISEComposite", classifier=rise, dataset=dataset, train_file=False)
def test_different_pipelines(): random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_state)), ('transform', FeatureUnion([ ('mean', RowTransformer(FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer(FunctionTransformer(func=np.std, validate=False))), ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))), ])), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals='sqrt', features=[np.mean, np.std, time_series_slope], random_state=random_state) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def tsf_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False, ) steps = [ ("segment", RandomIntervalSegmenter(n_intervals="sqrt")), ( "transform", FeatureUnion( [ ( "mean", make_row_transformer( FunctionTransformer(func=np.mean, validate=False) ), ), ( "std", make_row_transformer( FunctionTransformer(func=np.std, validate=False) ), ), ( "slope", make_row_transformer( FunctionTransformer( func=time_series_slope, validate=False ) ), ), ] ), ), ("clf", DecisionTreeClassifier()), ] base_estimator = Pipeline(steps) tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False, )
def test_different_pipelines(): random_state = 1233 X_train, y_train = make_classification_problem() steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=1, random_state=random_state), ), ( "transform", FeatureUnion([ ( "mean", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.mean, validate=False), check_transformer=False, ), ), ( "std", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=np.std, validate=False), check_transformer=False, ), ), ( "slope", SeriesToPrimitivesRowTransformer( FunctionTransformer(func=time_series_slope, validate=False), check_transformer=False, ), ), ]), ), ] pipe = Pipeline(steps) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals=1, features=[np.mean, np.std, time_series_slope], random_state=random_state, ) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_equivalent_model_specifications(n_intervals, n_estimators): """Test composable TSF vs an equivalent model.""" random_state = 1234 X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") # Due to tie-breaking/floating point rounding in the final decision tree # classifier, the results depend on the # exact column order of the input data # Compare pipeline predictions outside of ensemble. steps = [ ( "segment", RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state), ), ( "transform", FeatureUnion([("mean", mean_transformer), ("std", std_transformer)]), ), ("clf", DecisionTreeClassifier(random_state=random_state)), ] clf1 = Pipeline(steps) clf1.fit(X_train, y_train) a = clf1.predict(X_test) steps = [ ( "transform", RandomIntervalFeatureExtractor( n_intervals=n_intervals, features=[np.mean, np.std], random_state=random_state, ), ), ("clf", DecisionTreeClassifier(random_state=random_state)), ] clf2 = Pipeline(steps) clf2.fit(X_train, y_train) b = clf2.predict(X_test) np.array_equal(a, b)
def tsf_benchmarking(): for i in range(len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_estimators=100) exp.run_experiment(overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False) steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), ('transform', FeatureUnion([('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False)))])), ('clf', DecisionTreeClassifier()) ] base_estimator = Pipeline(steps) tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False)
def main(): #1. Loading and splitting the dataset X_train, y_train = load_italy_power_demand(split='train', return_X_y=True) X_test, y_test = load_italy_power_demand(split='test', return_X_y=True) print('Shape of X, y train and test dataset', X_train.shape, y_train.shape, X_test.shape, y_test.shape, '\n') print('X_train:', X_train.head(), '\n') print('\nX_train info', X_train.info(), '\n') labels, counts = np.unique(y_train, return_counts=True) print( '\nThere are', labels, 'labels in this dataset, one corresponds to winter and the other to summer. The counter of each one is', counts, '\n') #2. Creating a Model, Fit and Predict Sklearn Classifier #Sktime Tabularizing the data X_train_tab = tabularize(X_train) X_test_tab = tabularize(X_test) print('\n X_train tabularized\n', X_train_tab.head(), '\n') #2.1 SKlearn RandomForest Classifier classifier = RandomForestClassifier(n_estimators=100) classifier.fit(X_train_tab, y_train) y_pred = classifier.predict(X_test_tab) print('Accuracy sklearn RandomForestClassifier', round(accuracy_score(y_test, y_pred), 4), '\n') #2.2 Same SKlearn as above but using make_pipeline w/ Sktime Tabularizer classifier = make_pipeline(Tabularizer(), RandomForestClassifier(n_estimators=100), verbose=True) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime Tabularize()', round(classifier.score(X_test, y_test), 4), '\n') #3 Sklearn using make_pipeline w/ Sktime TSFreshFeatureExtractor classifier = make_pipeline(TSFreshFeatureExtractor(show_warnings=False), RandomForestClassifier(n_estimators=100)) classifier.fit(X_train, y_train) print( 'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime TSFreshFeatureExtractor that automatically extracts and filters several key statistical features from the nested X_train time series', round(classifier.score(X_test, y_test), 4), '\n') #4. Using Time series algorithms and classifiers from sklearn/sktime steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), #Sktime ( 'transform', FeatureUnion([ #Sklearn ('mean', RowTransformer( FunctionTransformer(func=np.mean, validate=False))), #sktime ('std', RowTransformer( FunctionTransformer(func=np.std, validate=False))), #sktime ('slope', RowTransformer( FunctionTransformer(func=time_series_slope, validate=False))) #sktime ])), ('clf', DecisionTreeClassifier()) #From Sklearn ] time_series_tree = Pipeline(steps, verbose=True) #sklearn time_series_tree.fit(X_train, y_train) print( 'Accuracy sklearn DecisionTreeClassifier using sklearn Pipeline() as well as segmentation and transformation techniques from sktime and sklearn', round(time_series_tree.score(X_test, y_test), 4)) #5. Using Time series Sktime tsf = TimeSeriesForestClassifier(n_estimators=100, verbose=True) tsf.fit(X_train, y_train) print('Accuracy sktime TimeSeriesForestClassifier', round(tsf.score(X_test, y_test), 4))