Esempio n. 1
0
def test_pipeline(network=CNNClassifier()):
    '''
    slightly more generalised test with sktime pipelines
        load data,
        construct pipeline with classifier,
        fit,
        score
    '''

    print("Start test_pipeline()")

    from sktime.pipeline import Pipeline

    # just a simple (useless) pipeline for the purposes of testing
    # that the keras network is compatible with that system
    steps = [
        ('clf', network)
    ]
    clf = Pipeline(steps)

    X_train, y_train = load_italy_power_demand(split='TRAIN', return_X_y=True)
    X_test, y_test = load_italy_power_demand(split='TEST', return_X_y=True)

    hist = clf.fit(X_train[:10], y_train[:10])

    print(clf.score(X_test[:10], y_test[:10]))
    print("End test_pipeline()")
def test_different_pipelines():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
        ('transform',
         FeatureUnion([
             ('mean',
              RowwiseTransformer(
                  FunctionTransformer(func=np.mean, validate=False))),
             ('std',
              RowwiseTransformer(
                  FunctionTransformer(func=np.std, validate=False))),
             ('slope',
              RowwiseTransformer(
                  FunctionTransformer(func=time_series_slope,
                                      validate=False))),
         ])),
    ]
    pipe = Pipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals='sqrt',
        features=[np.mean, np.std, time_series_slope],
        random_state=random_seed)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Esempio n. 3
0
def test_pipeline(network=catch22ForestClassifier()):
    '''
    slightly more generalised test with sktime pipelines
        load data,
        construct pipeline with classifier,
        fit,
        score
    '''

    print("Start test_pipeline()")

    from sktime.pipeline import Pipeline

    # just a simple (useless) pipeline

    steps = [('clf', network)]
    clf = Pipeline(steps)

    X_train, y_train = load_gunpoint(split='TRAIN', return_X_y=True)
    X_test, y_test = load_gunpoint(split='TEST', return_X_y=True)

    hist = clf.fit(X_train[:10], y_train[:10])

    print(clf.score(X_test[:10], y_test[:10]))
    print("End test_pipeline()")
Esempio n. 4
0
def test_RowwiseTransformer_pipeline():
    # using pure sklearn
    mean_func = lambda X: pd.DataFrame([np.mean(row) for row in X])
    first_func = lambda X: pd.DataFrame([row[0] for row in X])
    column_transformer = ColumnTransformer(
        [('mean', FunctionTransformer(func=mean_func, validate=False), 'ts'),
         ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    strategy = [
        ('feature_extract', column_transformer),
        ('rfestimator', estimator)]
    model = Pipeline(steps=strategy)
    model.fit(X_train, y_train)
    expected = model.predict(X_test)

    # using sktime with sklearn pipeline
    first_func = lambda X: pd.DataFrame([row[0] for row in X])
    column_transformer = ColumnTransformer(
        [('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), 'ts'),
         ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    strategy = [
        ('feature_extract', column_transformer),
        ('rfestimator', estimator)]
    model = Pipeline(steps=strategy)
    model.fit(X_train, y_train)
    got = model.predict(X_test)
    np.testing.assert_array_equal(expected, got)
def test_pipeline_predictions(n_intervals, n_estimators):
    random_state = 1234

    # Due to tie-breaking/floating point rounding in the final decision tree classifier, the results depend on the
    # exact column order of the input data

    #  Compare pipeline predictions outside of ensemble.
    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False))),
                            ('slope',
                             RowwiseTransformer(
                                 FunctionTransformer(func=time_series_slope,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=random_state)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [('transform',
              RandomIntervalFeatureExtractor(
                  n_intervals=n_intervals,
                  features=[np.mean, np.std, time_series_slope])),
             ('clf', DecisionTreeClassifier())]
    clf2 = Pipeline(steps, random_state=random_state)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
Esempio n. 6
0
    def _test_pipeline_predictions(n_intervals=None, random_state=None):
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=n_intervals,
                                          check_input=False)),
                 ('transform',
                  FeatureUnion([('mean',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.mean,
                                                         validate=False))),
                                ('std',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.std,
                                                         validate=False)))])),
                 ('clf', DecisionTreeClassifier())]
        clf1 = Pipeline(steps, random_state=random_state)
        clf1.fit(X_train, y_train)
        a = clf1.predict(X_test)

        steps = [('transform',
                  RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                                 features=[np.mean, np.std])),
                 ('clf', DecisionTreeClassifier())]
        clf2 = Pipeline(steps, random_state=random_state)
        clf2.fit(X_train, y_train)
        b = clf2.predict(X_test)
        np.array_equal(a, b)
Esempio n. 7
0
def test_ColumnTransformer_pipeline():
    # using Identity function transformers (transform series to series)
    id_func = lambda X: X
    column_transformer = ColumnTransformer(
        [('ts', FunctionTransformer(func=id_func, validate=False), 'ts'),
         ('ts_copy', FunctionTransformer(func=id_func, validate=False), 'ts_copy')])
    steps = [
        ('feature_extract', column_transformer),
        ('tabularise', Tabulariser()),
        ('rfestimator', RandomForestClassifier(n_estimators=2))]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_TimeSeriesForest_predictions(n_estimators, n_intervals):
    random_state = 1234

    # fully modular implementation using pipeline with FeatureUnion
    # steps = [
    #     ('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
    #     ('transform', FeatureUnion([
    #         ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
    #         ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
    #         ('slope', RowwiseTransformer(FunctionTransformer(func=time_series_slope, validate=False)))
    #     ])),
    #     ('clf', DecisionTreeClassifier())
    # ]
    # base_estimator = Pipeline(steps)
    features = [np.mean, np.std, time_series_slope]
    steps = [('transform',
              RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                             features=features)),
             ('clf', DecisionTreeClassifier())]
    base_estimator = Pipeline(steps)

    clf1 = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                      random_state=random_state,
                                      n_estimators=n_estimators)
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using RandomIntervalFeatureExtractor internally
    clf2 = TimeSeriesForestClassifier(random_state=random_state,
                                      n_estimators=n_estimators)
    clf2.set_params(**{'base_estimator__transform__n_intervals': n_intervals})
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
Esempio n. 9
0
def test_heterogenous_pipeline_column_ensmbler():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    n_intervals = 3

    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=1)

    # dims 0-3 with alternating classifiers.
    ct = ColumnEnsembleClassifier([
        ("RandomIntervalTree", clf1, [0]),
        ("KNN4", KNNTSC(n_neighbors=1), [4]),
        ("BOSSEnsemble1 ", BOSSEnsemble(ensemble_size=3), [1]),
        ("KNN2", KNNTSC(n_neighbors=1), [2]),
        ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]),
    ])

    ct.fit(X_train, y_train)
    ct.score(X_test, y_test)
Esempio n. 10
0
    def bop_pipeline(X, y):
        steps = [
            ('transform', SAX(remove_repeat_words=True)),
            ('clf', KNeighborsTimeSeriesClassifier(n_neighbors=1, metric=euclidean_distance))
        ]
        pipeline = Pipeline(steps)

        series_length = X.iloc[0, 0].shape[0]
        max_window_searches = series_length / 4
        win_inc = int((series_length - 10) / max_window_searches)
        if win_inc < 1:
            win_inc = 1
        window_sizes = [win_size for win_size in range(10, series_length + 1, win_inc)]

        cv_params = {
            'transform__word_length': [8, 10, 12, 14, 16],
            'transform__alphabet_size': [2, 3, 4],
            'transform__window_size': window_sizes
        }
        model = GridSearchCV(pipeline,
                             cv_params,
                             cv=5
                             )
        model.fit(X, y)
        return model
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_trees=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISE",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=1, min_length=5)),
                 ('transform',
                  FeatureUnion([('acf',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=acf_coefs,
                                                         validate=False))),
                                ('ps',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=powerspectrum,
                                                         validate=False)))])),
                 ('tabularise', Tabulariser()),
                 ('clf', DecisionTreeClassifier())]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                          n_estimators=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISEComposite",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
Esempio n. 12
0
def test_ReducedForecastingRegressor_with_TransformedTargetRegressor(
        window_length, dynamic, fh):
    # define setting
    # forecasting horizon
    len_fh = len(fh)

    # load data and split into train/test series
    y = load_shampoo_sales()
    train = pd.Series([y.iloc[0].iloc[:-len_fh]])
    test = pd.Series([y.iloc[0].iloc[-len_fh:]])

    forecaster = ReducedRegressionForecaster(tsr,
                                             window_length=window_length,
                                             dynamic=dynamic)
    transformer = Pipeline([('deseasonalise', Deseasonaliser(sp=12)),
                            ('detrend', Detrender(order=1))])
    m = TransformedTargetForecaster(forecaster, transformer)

    # check if error is raised when dynamic is set to true but fh is not specified
    if not dynamic:
        with pytest.raises(ValueError):
            m.fit(train)

    m.fit(train, fh=fh)
    pred = m.predict(fh=fh)
    assert len(pred) == len(test.iloc[0])
Esempio n. 13
0
def test_RowwiseTransformer_pipeline():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    # using pure sklearn
    def rowwise_mean(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat([pd.Series(col.apply(np.mean))
                        for _, col in X.items()], axis=1)
        return Xt

    def rowwise_first(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat([pd.Series(tabularise(col).iloc[:, 0])
                        for _, col in X.items()], axis=1)
        return Xt

    # specify column as a list, otherwise pandas Series are selected and passed on to the transformers
    transformer = ColumnTransformer([
        ('mean', FunctionTransformer(func=rowwise_mean, validate=False), ['dim_0']),
        ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1'])
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [
        ('extract', transformer),
        ('classify', estimator)
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    expected = model.predict(X_test)

    # using sktime with sklearn pipeline
    transformer = ColumnTransformer([
        ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), ['dim_0']),
        ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1'])
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [
        ('extract', transformer),
        ('classify', estimator)
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    actual = model.predict(X_test)
    np.testing.assert_array_equal(expected, actual)
Esempio n. 14
0
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3, check_input=False)),
        ('transform', FeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False)))
        ])),
        ('clf', DecisionTreeClassifier())
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
Esempio n. 15
0
def test_ColumnTransformer_pipeline():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    # using Identity function transformers (transform series to series)
    id_func = lambda X: X
    column_transformer = ColumnTransformer([
        ('id0', FunctionTransformer(func=id_func, validate=False), ['dim_0']),
        ('id1', FunctionTransformer(func=id_func, validate=False), ['dim_1'])
    ])
    steps = [('extract', column_transformer), ('tabularise', Tabulariser()),
             ('classify', RandomForestClassifier(n_estimators=2))]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
                                    random_state=random_seed)
    tran2 = RowwiseTransformer(
        FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean],
                                          random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)

    # Compare with transformer pipeline using TSFeatureUnion.
    steps = [
        ('segment',
         RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)),
        ('transform',
         FeatureUnion([
             ('mean',
              RowwiseTransformer(
                  FunctionTransformer(func=np.mean, validate=False))),
             ('std',
              RowwiseTransformer(
                  FunctionTransformer(func=np.std, validate=False))),
         ])),
    ]
    pipe = Pipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    n_ints = a.shape[1] // 2  # Rename columns for comparing re-ordered arrays.
    a.columns = [
        *a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std'
    ]
    a = a.reindex(np.sort(a.columns), axis=1)

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean, np.std],
                                          random_state=random_seed)
    b = tran.fit_transform(X_train)
    b = b.reindex(np.sort(b.columns), axis=1)
    np.testing.assert_array_equal(a, b)
Esempio n. 17
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == 'pf':
        return pf.ProximityForest(random_state = resampleId)
    elif cls.lower() == 'pt':
        return pf.ProximityTree(random_state = resampleId)
    elif cls.lower() == 'ps':
        return pf.ProximityStump(random_state = resampleId)
    elif cls.lower() == 'rise':
        return fb.RandomIntervalSpectralForest(random_state = resampleId)
    elif  cls.lower() == 'tsf':
        return ib.TimeSeriesForest(random_state = resampleId)
    elif cls.lower() == 'boss':
        return db.BOSSEnsemble()
    elif cls.lower() == 'st':
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == 'dtwcv':
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == 'ee' or cls.lower() == 'elasticensemble':
        return dist.ElasticEnsemble()
    elif cls.lower() == 'tsfcomposite':
        #It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == 'risecomposite':
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            ('transform', FeatureUnion([
                ('acf', RowwiseTransformer(FunctionTransformer(func=acf_coefs, validate=False))),
                ('ps', RowwiseTransformer(FunctionTransformer(func=powerspectrum, validate=False)))
            ])),
            ('tabularise', Tabulariser()),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100)
    else:
        raise Exception('UNKNOWN CLASSIFIER')
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_trees=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSF",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
            ('transform',
             FeatureUnion([('mean',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.mean,
                                                    validate=False))),
                           ('std',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.std,
                                                    validate=False))),
                           ('slope',
                            RowwiseTransformer(
                                FunctionTransformer(func=time_series_slope,
                                                    validate=False)))])),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                         n_estimators=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSFComposite",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
Esempio n. 19
0
def test_Pipeline_check_input():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean]))]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without check_input set to True
    assert pipe.check_input is True
    assert pipe.get_params()['check_input'] is True

    # Check that all components are initiated with check_input set to True
    for step in pipe.steps:
        assert step[1].check_input is True
        assert step[1].get_params()['check_input'] is True

    # Check that if random state is set, it's set to itself and all its random components
    ci = False
    pipe.set_params(**{'check_input': ci})

    assert pipe.check_input == ci
    assert pipe.get_params()['check_input'] == ci

    for step in pipe.steps:
        assert step[1].check_input == ci
        assert step[1].get_params()['check_input'] == ci
Esempio n. 20
0
def make_reduction_pipeline(estimator):
    """Helper function to use tabular estimators in time series setting"""
    pipeline = Pipeline([("transform", Tabulariser()), ("clf", estimator)])
    return pipeline
Esempio n. 21
0
def concatenateMethod(Classifier, x_train, y_train, x_test, y_test):
    steps = [('concatenate', ColumnConcatenator()),
             ('classify', Classifier(n_estimators=10))]
    clf = Pipeline(steps)
    clf.fit(x_train, y_train)
    return clf.score(x_test, y_test)
Esempio n. 22
0
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import pytest

from sktime.datasets import load_shampoo_sales
from sktime.forecasters import DummyForecaster
from sktime.highlevel.strategies import Forecasting2TSRReductionStrategy
from sktime.highlevel.tasks import ForecastingTask
from sktime.pipeline import Pipeline
from sktime.transformers.compose import Tabulariser
from sktime.utils.validation.forecasting import validate_fh
from sktime.utils.data_container import select_times

regressor = Pipeline([('tabularise', Tabulariser()),
                      ('clf', RandomForestRegressor(n_estimators=2))])


# Test forecasting strategy
@pytest.mark.parametrize("dynamic", [True, False])
@pytest.mark.parametrize("fh", [1, np.arange(1, 4)])
def test_univariate(dynamic, fh):

    fh = validate_fh(fh)
    len_fh = len(fh)

    y = load_shampoo_sales(return_y_as_dataframe=True)

    index = np.arange(y.iloc[0, 0].shape[0])
    train_times = index[:-len_fh]
    test_times = index[-len_fh:]
Esempio n. 23
0
def test_Pipeline_random_state():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])),
             ('clf', DecisionTreeClassifier())]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without random_state
    assert pipe.random_state is None
    assert pipe.get_params()['random_state'] is None

    # Check that all components are initiated without random_state
    for step in pipe.steps:
        assert step[1].random_state is None
        assert step[1].get_params()['random_state'] is None

    # Check that if random state is set, it's set to itself and all its random components
    rs = 1234
    pipe.set_params(**{'random_state': rs})

    assert pipe.random_state == rs
    assert pipe.get_params()['random_state'] == rs

    for step in pipe.steps:
        assert step[1].random_state == rs
        assert step[1].get_params()['random_state'] == rs

    # Check specific results
    X_train, y_train = load_gunpoint(return_X_y=True)
    X_test, y_test = load_gunpoint("TEST", return_X_y=True)

    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('extract',
         RowwiseTransformer(FunctionTransformer(func=np.mean,
                                                validate=False))),
        ('clf', DecisionTreeClassifier())
    ]
    pipe = Pipeline(steps, random_state=rs)
    pipe.fit(X_train, y_train)
    y_pred_first = pipe.predict(X_test)
    N_ITER = 10
    for _ in range(N_ITER):
        pipe = Pipeline(steps, random_state=rs)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        np.testing.assert_array_equal(y_pred_first, y_pred)
Esempio n. 24
0
    def __init__(self,
                 base_estimator=None,
                 n_estimators=500,
                 criterion='mse',
                 max_depth=None,
                 min_samples_split=2,
                 min_samples_leaf=1,
                 min_weight_fraction_leaf=0.,
                 max_features=None,
                 max_leaf_nodes=None,
                 min_impurity_decrease=0.,
                 min_impurity_split=None,
                 bootstrap=False,
                 oob_score=False,
                 n_jobs=None,
                 random_state=None,
                 verbose=0,
                 warm_start=False,
                 check_input=True):

        if base_estimator is None:
            features = [np.mean, np.std, time_series_slope]
            steps = [('transform',
                      RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                                     features=features)),
                     ('clf', DecisionTreeRegressor())]
            base_estimator = Pipeline(steps)

        elif not isinstance(base_estimator, Pipeline):
            raise ValueError(
                'Base estimator must be pipeline with transforms.')
        elif not isinstance(base_estimator.steps[-1][1],
                            DecisionTreeRegressor):
            raise ValueError(
                'Last step in base estimator pipeline must be DecisionTreeRegressor.'
            )

        # Assign values, even though passed on to base estimator below, necessary here for cloning
        self.criterion = criterion
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.min_impurity_split = min_impurity_split

        # Rename estimator params according to name in pipeline.
        estimator = base_estimator.steps[-1][0]
        estimator_params = {
            "criterion": criterion,
            "max_depth": max_depth,
            "min_samples_split": min_samples_split,
            "min_samples_leaf": min_samples_leaf,
            "min_weight_fraction_leaf": min_weight_fraction_leaf,
            "max_features": max_features,
            "max_leaf_nodes": max_leaf_nodes,
            "min_impurity_decrease": min_impurity_decrease,
            "min_impurity_split": min_impurity_split,
        }
        estimator_params = {
            f'{estimator}__{pname}': pval
            for pname, pval in estimator_params.items()
        }

        # Pass on params.
        super(TimeSeriesForestRegressor, self).__init__(
            base_estimator=base_estimator,
            n_estimators=n_estimators,
            estimator_params=tuple(estimator_params.keys()),
            bootstrap=bootstrap,
            oob_score=oob_score,
            n_jobs=n_jobs,
            random_state=random_state,
            verbose=verbose,
            warm_start=warm_start,
        )

        # Assign random state to pipeline.
        base_estimator.set_params(**{
            'random_state': random_state,
            'check_input': False
        })

        # Store renamed estimator params.
        for pname, pval in estimator_params.items():
            self.__setattr__(pname, pval)
        self.check_input = check_input
rise_steps = [
    ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
    ('transform',
     FeatureUnion([
         ('ar',
          RowwiseTransformer(FunctionTransformer(func=ar_coefs,
                                                 validate=False))),
         ('acf',
          RowwiseTransformer(
              FunctionTransformer(func=acf_coefs, validate=False))),
         ('ps',
          RowwiseTransformer(
              FunctionTransformer(func=powerspectrum, validate=False)))
     ])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(rise_steps)
# ('RISE', TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True)),
classifiers = [('TimeSeriesForest', TimeSeriesForest()),
               ('ProximityForest', ProximityForest(n_trees=100)),
               ('BOSS', BOSSEnsemble()),
               ('RandomIntervalSpectralForest', RandomIntervalSpectralForest())
               ]
tuples = ([(name, 'Accuracy'), (name, 'F1-Score')] for name, _ in classifiers)
index = pd.MultiIndex.from_tuples(itertools.chain(*tuples),
                                  names=['classifier', 'metric'])


def calculate_performance(output_file):
    def evaluate_classifiers(dst):
        print("[%s] Processing dataset %s" %
              (datetime.now().strftime("%F %T"), dst))
Esempio n. 26
0
    traint = transformer.fit_transform(train)
    traint = traint.iloc[:, 0]

    forecaster.fit(traint)
    pred = forecaster.predict(fh=fh)

    pred = pd.DataFrame(pd.Series([pred]))
    pred = transformer.inverse_transform(pred)
    expected = pred.iloc[0, 0]
    check_consistent_time_indices(expected, test.iloc[0])

    np.testing.assert_allclose(actual, expected)


tsr = Pipeline([  # time series regressor
    ('tabularise', Tabulariser()), ('regress', DummyRegressor())
])


@pytest.mark.parametrize("window_length", [3, 5, 7])
@pytest.mark.parametrize("dynamic", [True, False])
@pytest.mark.parametrize(
    "fh", [np.array([1]), np.array([1, 2]),
           np.array([5, 6])])
def test_ReducedForecastingRegressor(window_length, dynamic, fh):
    # define setting
    # forecasting horizon
    len_fh = len(fh)

    # load data and split into train/test series
    y = load_shampoo_sales()