def test_pipeline(network=CNNClassifier()): ''' slightly more generalised test with sktime pipelines load data, construct pipeline with classifier, fit, score ''' print("Start test_pipeline()") from sktime.pipeline import Pipeline # just a simple (useless) pipeline for the purposes of testing # that the keras network is compatible with that system steps = [ ('clf', network) ] clf = Pipeline(steps) X_train, y_train = load_italy_power_demand(split='TRAIN', return_X_y=True) X_test, y_test = load_italy_power_demand(split='TEST', return_X_y=True) hist = clf.fit(X_train[:10], y_train[:10]) print(clf.score(X_test[:10], y_test[:10])) print("End test_pipeline()")
def test_different_pipelines(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), ('transform', FeatureUnion([ ('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowwiseTransformer( FunctionTransformer(func=time_series_slope, validate=False))), ])), ] pipe = Pipeline(steps, random_state=random_seed) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals='sqrt', features=[np.mean, np.std, time_series_slope], random_state=random_seed) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_pipeline(network=catch22ForestClassifier()): ''' slightly more generalised test with sktime pipelines load data, construct pipeline with classifier, fit, score ''' print("Start test_pipeline()") from sktime.pipeline import Pipeline # just a simple (useless) pipeline steps = [('clf', network)] clf = Pipeline(steps) X_train, y_train = load_gunpoint(split='TRAIN', return_X_y=True) X_test, y_test = load_gunpoint(split='TEST', return_X_y=True) hist = clf.fit(X_train[:10], y_train[:10]) print(clf.score(X_test[:10], y_test[:10])) print("End test_pipeline()")
def test_RowwiseTransformer_pipeline(): # using pure sklearn mean_func = lambda X: pd.DataFrame([np.mean(row) for row in X]) first_func = lambda X: pd.DataFrame([row[0] for row in X]) column_transformer = ColumnTransformer( [('mean', FunctionTransformer(func=mean_func, validate=False), 'ts'), ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) strategy = [ ('feature_extract', column_transformer), ('rfestimator', estimator)] model = Pipeline(steps=strategy) model.fit(X_train, y_train) expected = model.predict(X_test) # using sktime with sklearn pipeline first_func = lambda X: pd.DataFrame([row[0] for row in X]) column_transformer = ColumnTransformer( [('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), 'ts'), ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) strategy = [ ('feature_extract', column_transformer), ('rfestimator', estimator)] model = Pipeline(steps=strategy) model.fit(X_train, y_train) got = model.predict(X_test) np.testing.assert_array_equal(expected, got)
def test_pipeline_predictions(n_intervals, n_estimators): random_state = 1234 # Due to tie-breaking/floating point rounding in the final decision tree classifier, the results depend on the # exact column order of the input data # Compare pipeline predictions outside of ensemble. steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)), ('transform', FeatureUnion([('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowwiseTransformer( FunctionTransformer(func=time_series_slope, validate=False)))])), ('clf', DecisionTreeClassifier())] clf1 = Pipeline(steps, random_state=random_state) clf1.fit(X_train, y_train) a = clf1.predict(X_test) steps = [('transform', RandomIntervalFeatureExtractor( n_intervals=n_intervals, features=[np.mean, np.std, time_series_slope])), ('clf', DecisionTreeClassifier())] clf2 = Pipeline(steps, random_state=random_state) clf2.fit(X_train, y_train) b = clf2.predict(X_test) np.array_equal(a, b)
def _test_pipeline_predictions(n_intervals=None, random_state=None): steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals, check_input=False)), ('transform', FeatureUnion([('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False)))])), ('clf', DecisionTreeClassifier())] clf1 = Pipeline(steps, random_state=random_state) clf1.fit(X_train, y_train) a = clf1.predict(X_test) steps = [('transform', RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=[np.mean, np.std])), ('clf', DecisionTreeClassifier())] clf2 = Pipeline(steps, random_state=random_state) clf2.fit(X_train, y_train) b = clf2.predict(X_test) np.array_equal(a, b)
def test_ColumnTransformer_pipeline(): # using Identity function transformers (transform series to series) id_func = lambda X: X column_transformer = ColumnTransformer( [('ts', FunctionTransformer(func=id_func, validate=False), 'ts'), ('ts_copy', FunctionTransformer(func=id_func, validate=False), 'ts_copy')]) steps = [ ('feature_extract', column_transformer), ('tabularise', Tabulariser()), ('rfestimator', RandomForestClassifier(n_estimators=2))] model = Pipeline(steps=steps) model.fit(X_train, y_train) y_pred = model.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 # fully modular implementation using pipeline with FeatureUnion # steps = [ # ('segment', RandomIntervalSegmenter(n_intervals=n_intervals)), # ('transform', FeatureUnion([ # ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), # ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))), # ('slope', RowwiseTransformer(FunctionTransformer(func=time_series_slope, validate=False))) # ])), # ('clf', DecisionTreeClassifier()) # ] # base_estimator = Pipeline(steps) features = [np.mean, np.std, time_series_slope] steps = [('transform', RandomIntervalFeatureExtractor(n_intervals=n_intervals, features=features)), ('clf', DecisionTreeClassifier())] base_estimator = Pipeline(steps) clf1 = TimeSeriesForestClassifier(base_estimator=base_estimator, random_state=random_state, n_estimators=n_estimators) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using RandomIntervalFeatureExtractor internally clf2 = TimeSeriesForestClassifier(random_state=random_state, n_estimators=n_estimators) clf2.set_params(**{'base_estimator__transform__n_intervals': n_intervals}) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def test_heterogenous_pipeline_column_ensmbler(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) n_intervals = 3 steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)), ('transform', FeatureUnion([('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False)))])), ('clf', DecisionTreeClassifier())] clf1 = Pipeline(steps, random_state=1) # dims 0-3 with alternating classifiers. ct = ColumnEnsembleClassifier([ ("RandomIntervalTree", clf1, [0]), ("KNN4", KNNTSC(n_neighbors=1), [4]), ("BOSSEnsemble1 ", BOSSEnsemble(ensemble_size=3), [1]), ("KNN2", KNNTSC(n_neighbors=1), [2]), ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]), ]) ct.fit(X_train, y_train) ct.score(X_test, y_test)
def bop_pipeline(X, y): steps = [ ('transform', SAX(remove_repeat_words=True)), ('clf', KNeighborsTimeSeriesClassifier(n_neighbors=1, metric=euclidean_distance)) ] pipeline = Pipeline(steps) series_length = X.iloc[0, 0].shape[0] max_window_searches = series_length / 4 win_inc = int((series_length - 10) / max_window_searches) if win_inc < 1: win_inc = 1 window_sizes = [win_size for win_size in range(10, series_length + 1, win_inc)] cv_params = { 'transform__word_length': [8, 10, 12, 14, 16], 'transform__alphabet_size': [2, 3, 4], 'transform__window_size': window_sizes } model = GridSearchCV(pipeline, cv_params, cv=5 ) model.fit(X, y) return model
def rise_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) rise = fb.RandomIntervalSpectralForest(n_trees=100) exp.run_experiment(overwrite=True, datasets_dir_path=data_dir, results_path=results_dir, cls_name="PythonRISE", classifier=rise, dataset=dataset, train_file=False) steps = [('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)), ('transform', FeatureUnion([('acf', RowwiseTransformer( FunctionTransformer(func=acf_coefs, validate=False))), ('ps', RowwiseTransformer( FunctionTransformer(func=powerspectrum, validate=False)))])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier())] base_estimator = Pipeline(steps) rise = TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=True, datasets_dir_path=data_dir, results_path=results_dir, cls_name="PythonRISEComposite", classifier=rise, dataset=dataset, train_file=False)
def test_ReducedForecastingRegressor_with_TransformedTargetRegressor( window_length, dynamic, fh): # define setting # forecasting horizon len_fh = len(fh) # load data and split into train/test series y = load_shampoo_sales() train = pd.Series([y.iloc[0].iloc[:-len_fh]]) test = pd.Series([y.iloc[0].iloc[-len_fh:]]) forecaster = ReducedRegressionForecaster(tsr, window_length=window_length, dynamic=dynamic) transformer = Pipeline([('deseasonalise', Deseasonaliser(sp=12)), ('detrend', Detrender(order=1))]) m = TransformedTargetForecaster(forecaster, transformer) # check if error is raised when dynamic is set to true but fh is not specified if not dynamic: with pytest.raises(ValueError): m.fit(train) m.fit(train, fh=fh) pred = m.predict(fh=fh) assert len(pred) == len(test.iloc[0])
def test_RowwiseTransformer_pipeline(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) # using pure sklearn def rowwise_mean(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat([pd.Series(col.apply(np.mean)) for _, col in X.items()], axis=1) return Xt def rowwise_first(X): if isinstance(X, pd.Series): X = pd.DataFrame(X) Xt = pd.concat([pd.Series(tabularise(col).iloc[:, 0]) for _, col in X.items()], axis=1) return Xt # specify column as a list, otherwise pandas Series are selected and passed on to the transformers transformer = ColumnTransformer([ ('mean', FunctionTransformer(func=rowwise_mean, validate=False), ['dim_0']), ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1']) ]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [ ('extract', transformer), ('classify', estimator) ] model = Pipeline(steps=steps) model.fit(X_train, y_train) expected = model.predict(X_test) # using sktime with sklearn pipeline transformer = ColumnTransformer([ ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), ['dim_0']), ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1']) ]) estimator = RandomForestClassifier(n_estimators=2, random_state=1) steps = [ ('extract', transformer), ('classify', estimator) ] model = Pipeline(steps=steps) model.fit(X_train, y_train) actual = model.predict(X_test) np.testing.assert_array_equal(expected, actual)
def test_FeatureUnion_pipeline(): # pipeline with segmentation plus multiple feature extraction steps = [ ('segment', RandomIntervalSegmenter(n_intervals=3, check_input=False)), ('transform', FeatureUnion([ ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))) ])), ('clf', DecisionTreeClassifier()) ] clf = Pipeline(steps) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_ColumnTransformer_pipeline(): X_train, y_train = load_basic_motions("TRAIN", return_X_y=True) X_test, y_test = load_basic_motions("TEST", return_X_y=True) # using Identity function transformers (transform series to series) id_func = lambda X: X column_transformer = ColumnTransformer([ ('id0', FunctionTransformer(func=id_func, validate=False), ['dim_0']), ('id1', FunctionTransformer(func=id_func, validate=False), ['dim_1']) ]) steps = [('extract', column_transformer), ('tabularise', Tabulariser()), ('classify', RandomForestClassifier(n_estimators=2))] model = Pipeline(steps=steps) model.fit(X_train, y_train) y_pred = model.predict(X_test) assert y_pred.shape[0] == y_test.shape[0] np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_different_implementations(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed) tran2 = RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B) # Compare with transformer pipeline using TSFeatureUnion. steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)), ('transform', FeatureUnion([ ('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False))), ])), ] pipe = Pipeline(steps, random_state=random_seed) a = pipe.fit_transform(X_train) n_ints = a.shape[1] // 2 # Rename columns for comparing re-ordered arrays. a.columns = [ *a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std' ] a = a.reindex(np.sort(a.columns), axis=1) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std], random_state=random_seed) b = tran.fit_transform(X_train) b = b.reindex(np.sort(b.columns), axis=1) np.testing.assert_array_equal(a, b)
def set_classifier(cls, resampleId): """ Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif. This may well get superceded, it is just how e have always done it :param cls: String indicating which classifier you want :return: A classifier. """ if cls.lower() == 'pf': return pf.ProximityForest(random_state = resampleId) elif cls.lower() == 'pt': return pf.ProximityTree(random_state = resampleId) elif cls.lower() == 'ps': return pf.ProximityStump(random_state = resampleId) elif cls.lower() == 'rise': return fb.RandomIntervalSpectralForest(random_state = resampleId) elif cls.lower() == 'tsf': return ib.TimeSeriesForest(random_state = resampleId) elif cls.lower() == 'boss': return db.BOSSEnsemble() elif cls.lower() == 'st': return st.ShapeletTransformClassifier(time_contract_in_mins=1500) elif cls.lower() == 'dtwcv': return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv") elif cls.lower() == 'ee' or cls.lower() == 'elasticensemble': return dist.ElasticEnsemble() elif cls.lower() == 'tsfcomposite': #It defaults to TSF return ensemble.TimeSeriesForestClassifier() elif cls.lower() == 'risecomposite': steps = [ ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)), ('transform', FeatureUnion([ ('acf', RowwiseTransformer(FunctionTransformer(func=acf_coefs, validate=False))), ('ps', RowwiseTransformer(FunctionTransformer(func=powerspectrum, validate=False))) ])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier()) ] base_estimator = Pipeline(steps) return ensemble.TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100) else: raise Exception('UNKNOWN CLASSIFIER')
def tsf_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_trees=100) exp.run_experiment(overwrite=False, datasets_dir_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False) steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), ('transform', FeatureUnion([('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowwiseTransformer( FunctionTransformer(func=time_series_slope, validate=False)))])), ('clf', DecisionTreeClassifier()) ] base_estimator = Pipeline(steps) tsf = TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100) exp.run_experiment(overwrite=False, datasets_dir_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False)
def test_Pipeline_check_input(): steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean]))] pipe = Pipeline(steps) # Check that pipe is initiated without check_input set to True assert pipe.check_input is True assert pipe.get_params()['check_input'] is True # Check that all components are initiated with check_input set to True for step in pipe.steps: assert step[1].check_input is True assert step[1].get_params()['check_input'] is True # Check that if random state is set, it's set to itself and all its random components ci = False pipe.set_params(**{'check_input': ci}) assert pipe.check_input == ci assert pipe.get_params()['check_input'] == ci for step in pipe.steps: assert step[1].check_input == ci assert step[1].get_params()['check_input'] == ci
def make_reduction_pipeline(estimator): """Helper function to use tabular estimators in time series setting""" pipeline = Pipeline([("transform", Tabulariser()), ("clf", estimator)]) return pipeline
def concatenateMethod(Classifier, x_train, y_train, x_test, y_test): steps = [('concatenate', ColumnConcatenator()), ('classify', Classifier(n_estimators=10))] clf = Pipeline(steps) clf.fit(x_train, y_train) return clf.score(x_test, y_test)
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestRegressor import pytest from sktime.datasets import load_shampoo_sales from sktime.forecasters import DummyForecaster from sktime.highlevel.strategies import Forecasting2TSRReductionStrategy from sktime.highlevel.tasks import ForecastingTask from sktime.pipeline import Pipeline from sktime.transformers.compose import Tabulariser from sktime.utils.validation.forecasting import validate_fh from sktime.utils.data_container import select_times regressor = Pipeline([('tabularise', Tabulariser()), ('clf', RandomForestRegressor(n_estimators=2))]) # Test forecasting strategy @pytest.mark.parametrize("dynamic", [True, False]) @pytest.mark.parametrize("fh", [1, np.arange(1, 4)]) def test_univariate(dynamic, fh): fh = validate_fh(fh) len_fh = len(fh) y = load_shampoo_sales(return_y_as_dataframe=True) index = np.arange(y.iloc[0, 0].shape[0]) train_times = index[:-len_fh] test_times = index[-len_fh:]
def test_Pipeline_random_state(): steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])), ('clf', DecisionTreeClassifier())] pipe = Pipeline(steps) # Check that pipe is initiated without random_state assert pipe.random_state is None assert pipe.get_params()['random_state'] is None # Check that all components are initiated without random_state for step in pipe.steps: assert step[1].random_state is None assert step[1].get_params()['random_state'] is None # Check that if random state is set, it's set to itself and all its random components rs = 1234 pipe.set_params(**{'random_state': rs}) assert pipe.random_state == rs assert pipe.get_params()['random_state'] == rs for step in pipe.steps: assert step[1].random_state == rs assert step[1].get_params()['random_state'] == rs # Check specific results X_train, y_train = load_gunpoint(return_X_y=True) X_test, y_test = load_gunpoint("TEST", return_X_y=True) steps = [ ('segment', RandomIntervalSegmenter(n_intervals=3)), ('extract', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), ('clf', DecisionTreeClassifier()) ] pipe = Pipeline(steps, random_state=rs) pipe.fit(X_train, y_train) y_pred_first = pipe.predict(X_test) N_ITER = 10 for _ in range(N_ITER): pipe = Pipeline(steps, random_state=rs) pipe.fit(X_train, y_train) y_pred = pipe.predict(X_test) np.testing.assert_array_equal(y_pred_first, y_pred)
def __init__(self, base_estimator=None, n_estimators=500, criterion='mse', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0., max_features=None, max_leaf_nodes=None, min_impurity_decrease=0., min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, check_input=True): if base_estimator is None: features = [np.mean, np.std, time_series_slope] steps = [('transform', RandomIntervalFeatureExtractor(n_intervals='sqrt', features=features)), ('clf', DecisionTreeRegressor())] base_estimator = Pipeline(steps) elif not isinstance(base_estimator, Pipeline): raise ValueError( 'Base estimator must be pipeline with transforms.') elif not isinstance(base_estimator.steps[-1][1], DecisionTreeRegressor): raise ValueError( 'Last step in base estimator pipeline must be DecisionTreeRegressor.' ) # Assign values, even though passed on to base estimator below, necessary here for cloning self.criterion = criterion self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.min_impurity_split = min_impurity_split # Rename estimator params according to name in pipeline. estimator = base_estimator.steps[-1][0] estimator_params = { "criterion": criterion, "max_depth": max_depth, "min_samples_split": min_samples_split, "min_samples_leaf": min_samples_leaf, "min_weight_fraction_leaf": min_weight_fraction_leaf, "max_features": max_features, "max_leaf_nodes": max_leaf_nodes, "min_impurity_decrease": min_impurity_decrease, "min_impurity_split": min_impurity_split, } estimator_params = { f'{estimator}__{pname}': pval for pname, pval in estimator_params.items() } # Pass on params. super(TimeSeriesForestRegressor, self).__init__( base_estimator=base_estimator, n_estimators=n_estimators, estimator_params=tuple(estimator_params.keys()), bootstrap=bootstrap, oob_score=oob_score, n_jobs=n_jobs, random_state=random_state, verbose=verbose, warm_start=warm_start, ) # Assign random state to pipeline. base_estimator.set_params(**{ 'random_state': random_state, 'check_input': False }) # Store renamed estimator params. for pname, pval in estimator_params.items(): self.__setattr__(pname, pval) self.check_input = check_input
rise_steps = [ ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)), ('transform', FeatureUnion([ ('ar', RowwiseTransformer(FunctionTransformer(func=ar_coefs, validate=False))), ('acf', RowwiseTransformer( FunctionTransformer(func=acf_coefs, validate=False))), ('ps', RowwiseTransformer( FunctionTransformer(func=powerspectrum, validate=False))) ])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier()) ] base_estimator = Pipeline(rise_steps) # ('RISE', TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True)), classifiers = [('TimeSeriesForest', TimeSeriesForest()), ('ProximityForest', ProximityForest(n_trees=100)), ('BOSS', BOSSEnsemble()), ('RandomIntervalSpectralForest', RandomIntervalSpectralForest()) ] tuples = ([(name, 'Accuracy'), (name, 'F1-Score')] for name, _ in classifiers) index = pd.MultiIndex.from_tuples(itertools.chain(*tuples), names=['classifier', 'metric']) def calculate_performance(output_file): def evaluate_classifiers(dst): print("[%s] Processing dataset %s" % (datetime.now().strftime("%F %T"), dst))
traint = transformer.fit_transform(train) traint = traint.iloc[:, 0] forecaster.fit(traint) pred = forecaster.predict(fh=fh) pred = pd.DataFrame(pd.Series([pred])) pred = transformer.inverse_transform(pred) expected = pred.iloc[0, 0] check_consistent_time_indices(expected, test.iloc[0]) np.testing.assert_allclose(actual, expected) tsr = Pipeline([ # time series regressor ('tabularise', Tabulariser()), ('regress', DummyRegressor()) ]) @pytest.mark.parametrize("window_length", [3, 5, 7]) @pytest.mark.parametrize("dynamic", [True, False]) @pytest.mark.parametrize( "fh", [np.array([1]), np.array([1, 2]), np.array([5, 6])]) def test_ReducedForecastingRegressor(window_length, dynamic, fh): # define setting # forecasting horizon len_fh = len(fh) # load data and split into train/test series y = load_shampoo_sales()