def test_different_pipelines():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
        ('transform',
         FeatureUnion([
             ('mean',
              RowwiseTransformer(
                  FunctionTransformer(func=np.mean, validate=False))),
             ('std',
              RowwiseTransformer(
                  FunctionTransformer(func=np.std, validate=False))),
             ('slope',
              RowwiseTransformer(
                  FunctionTransformer(func=time_series_slope,
                                      validate=False))),
         ])),
    ]
    pipe = Pipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals='sqrt',
        features=[np.mean, np.std, time_series_slope],
        random_state=random_seed)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
예제 #2
0
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_trees=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISE",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=1, min_length=5)),
                 ('transform',
                  FeatureUnion([('acf',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=acf_coefs,
                                                         validate=False))),
                                ('ps',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=powerspectrum,
                                                         validate=False)))])),
                 ('tabularise', Tabulariser()),
                 ('clf', DecisionTreeClassifier())]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                          n_estimators=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISEComposite",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
예제 #3
0
def test_rowwise_transformer_function_transformer_series_to_primitives():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowwiseTransformer(ft)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], float)  # check series-to-primitive transforms
예제 #4
0
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed)
    tran2 = RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)

    # Compare with transformer pipeline using TSFeatureUnion.
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)),
        ('transform', TSFeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
        ])),
    ]
    pipe = TSPipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    n_ints = a.shape[1] // 2  # Rename columns for comparing re-ordered arrays.
    a.columns = [*a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std']
    a = a.reindex(np.sort(a.columns), axis=1)

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std],
                                          random_state=random_seed)
    b = tran.fit_transform(X_train)
    b = b.reindex(np.sort(b.columns), axis=1)
    np.testing.assert_array_equal(a, b)
    def _test_TimeSeriesForest_predictions(n_estimators=None, n_intervals=None, random_state=None):

        # fully modular implementation using pipeline with FeatureUnion
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals=n_intervals, check_input=False)),
            ('transform', TSFeatureUnion([
                ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
                ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
                ('slope', RowwiseTransformer(FunctionTransformer(func=time_series_slope, validate=False)))
            ])),
            ('clf', DecisionTreeClassifier())
        ]

        base_estimator = TSPipeline(steps)
        clf1 = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                          random_state=random_state,
                                          n_estimators=n_estimators)
        clf1.fit(X_train, y_train)
        a = clf1.predict_proba(X_test)

        # default, semi-modular implementation using RandomIntervalFeatureExtractor internally
        clf2 = TimeSeriesForestClassifier(random_state=random_state,
                                          n_estimators=n_estimators)
        clf2.set_params(**{'base_estimator__transform__n_intervals': n_intervals})
        clf2.fit(X_train, y_train)
        b = clf2.predict_proba(X_test)

        np.testing.assert_array_equal(a, b)
예제 #6
0
def test_heterogenous_pipeline_column_ensmbler():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    n_intervals = 3

    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=1)

    # dims 0-3 with alternating classifiers.
    ct = ColumnEnsembleClassifier([
        ("RandomIntervalTree", clf1, [0]),
        ("KNN4", KNNTSC(n_neighbors=1), [4]),
        ("BOSSEnsemble1 ", BOSSEnsemble(ensemble_size=3), [1]),
        ("KNN2", KNNTSC(n_neighbors=1), [2]),
        ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]),
    ])

    ct.fit(X_train, y_train)
    ct.score(X_test, y_test)
예제 #7
0
    def _test_pipeline_predictions(n_intervals=None, random_state=None):
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=n_intervals,
                                          check_input=False)),
                 ('transform',
                  FeatureUnion([('mean',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.mean,
                                                         validate=False))),
                                ('std',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=np.std,
                                                         validate=False)))])),
                 ('clf', DecisionTreeClassifier())]
        clf1 = Pipeline(steps, random_state=random_state)
        clf1.fit(X_train, y_train)
        a = clf1.predict(X_test)

        steps = [('transform',
                  RandomIntervalFeatureExtractor(n_intervals=n_intervals,
                                                 features=[np.mean, np.std])),
                 ('clf', DecisionTreeClassifier())]
        clf2 = Pipeline(steps, random_state=random_state)
        clf2.fit(X_train, y_train)
        b = clf2.predict(X_test)
        np.array_equal(a, b)
def test_pipeline_predictions(n_intervals, n_estimators):
    random_state = 1234

    # Due to tie-breaking/floating point rounding in the final decision tree classifier, the results depend on the
    # exact column order of the input data

    #  Compare pipeline predictions outside of ensemble.
    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False))),
                            ('slope',
                             RowwiseTransformer(
                                 FunctionTransformer(func=time_series_slope,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=random_state)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [('transform',
              RandomIntervalFeatureExtractor(
                  n_intervals=n_intervals,
                  features=[np.mean, np.std, time_series_slope])),
             ('clf', DecisionTreeClassifier())]
    clf2 = Pipeline(steps, random_state=random_state)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
예제 #9
0
def test_rowwise_transformer_transform_inverse_transform():
    X, y = load_gunpoint(return_X_y=True)
    t = RowwiseTransformer(StandardScaler())
    Xt = t.fit_transform(X)
    Xit = t.inverse_transform(Xt)
    assert Xit.shape == X.shape
    assert isinstance(Xit.iloc[0, 0], (pd.Series, np.ndarray))  # check series-to-series transforms
    np.testing.assert_array_almost_equal(tabularise(X).values, tabularise(Xit).values, decimal=5)
예제 #10
0
def test_FeatureUnion():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowwiseTransformer(ft)
    fu = FeatureUnion([
        ('mean', t),
        ('std',
         RowwiseTransformer(FunctionTransformer(func=np.std, validate=False)))
    ])
    Xt = fu.fit_transform(X, y)
    assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list))
예제 #11
0
def test_rowwise_transformer_sklearn_transfomer():
    mu = 10
    sd = 5
    X = generate_df_from_array(np.random.normal(loc=mu, scale=5, size=(100,)), n_rows=10, n_cols=1)
    t = StandardScaler(with_mean=True, with_std=True)
    r = RowwiseTransformer(t)

    Xt = r.fit_transform(X)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], (pd.Series, np.ndarray))  # check series-to-series transform
    np.testing.assert_almost_equal(Xt.iloc[0, 0].mean(), 0)  # check standardisation
    np.testing.assert_almost_equal(Xt.iloc[0, 0].std(), 1, decimal=2)
예제 #12
0
def test_rowwise_transformer_function_transformer_series_to_series():
    X, y = load_gunpoint(return_X_y=True)

    # series-to-series transform function
    def powerspectrum(x):
        fft = np.fft.fft(x)
        ps = fft.real * fft.real + fft.imag * fft.imag
        return ps[:ps.shape[0] // 2]

    ft = FunctionTransformer(func=powerspectrum, validate=False)
    t = RowwiseTransformer(ft)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], (pd.Series, np.ndarray))  # check series-to-series transforms
예제 #13
0
def test_RowwiseTransformer_pipeline():
    # using pure sklearn
    mean_func = lambda X: pd.DataFrame([np.mean(row) for row in X])
    first_func = lambda X: pd.DataFrame([row[0] for row in X])
    column_transformer = ColumnTransformer(
        [('mean', FunctionTransformer(func=mean_func, validate=False), 'ts'),
         ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    strategy = [
        ('feature_extract', column_transformer),
        ('rfestimator', estimator)]
    model = Pipeline(steps=strategy)
    model.fit(X_train, y_train)
    expected = model.predict(X_test)

    # using sktime with sklearn pipeline
    first_func = lambda X: pd.DataFrame([row[0] for row in X])
    column_transformer = ColumnTransformer(
        [('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), 'ts'),
         ('first', FunctionTransformer(func=first_func, validate=False), 'ts_copy')])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    strategy = [
        ('feature_extract', column_transformer),
        ('rfestimator', estimator)]
    model = Pipeline(steps=strategy)
    model.fit(X_train, y_train)
    got = model.predict(X_test)
    np.testing.assert_array_equal(expected, got)
예제 #14
0
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3, check_input=False)),
        ('transform', FeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False)))
        ])),
        ('clf', DecisionTreeClassifier())
    ]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
                                    random_state=random_seed)
    tran2 = RowwiseTransformer(
        FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean],
                                          random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)
예제 #16
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == 'pf':
        return pf.ProximityForest(random_state = resampleId)
    elif cls.lower() == 'pt':
        return pf.ProximityTree(random_state = resampleId)
    elif cls.lower() == 'ps':
        return pf.ProximityStump(random_state = resampleId)
    elif cls.lower() == 'rise':
        return fb.RandomIntervalSpectralForest(random_state = resampleId)
    elif  cls.lower() == 'tsf':
        return ib.TimeSeriesForest(random_state = resampleId)
    elif cls.lower() == 'boss':
        return db.BOSSEnsemble()
    elif cls.lower() == 'st':
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == 'dtwcv':
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == 'ee' or cls.lower() == 'elasticensemble':
        return dist.ElasticEnsemble()
    elif cls.lower() == 'tsfcomposite':
        #It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == 'risecomposite':
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            ('transform', FeatureUnion([
                ('acf', RowwiseTransformer(FunctionTransformer(func=acf_coefs, validate=False))),
                ('ps', RowwiseTransformer(FunctionTransformer(func=powerspectrum, validate=False)))
            ])),
            ('tabularise', Tabulariser()),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100)
    else:
        raise Exception('UNKNOWN CLASSIFIER')
예제 #17
0
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_trees=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSF",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
            ('transform',
             FeatureUnion([('mean',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.mean,
                                                    validate=False))),
                           ('std',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.std,
                                                    validate=False))),
                           ('slope',
                            RowwiseTransformer(
                                FunctionTransformer(func=time_series_slope,
                                                    validate=False)))])),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                         n_estimators=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSFComposite",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
예제 #18
0
def test_RowwiseTransformer_pipeline():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    # using pure sklearn
    def rowwise_mean(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat([pd.Series(col.apply(np.mean))
                        for _, col in X.items()], axis=1)
        return Xt

    def rowwise_first(X):
        if isinstance(X, pd.Series):
            X = pd.DataFrame(X)
        Xt = pd.concat([pd.Series(tabularise(col).iloc[:, 0])
                        for _, col in X.items()], axis=1)
        return Xt

    # specify column as a list, otherwise pandas Series are selected and passed on to the transformers
    transformer = ColumnTransformer([
        ('mean', FunctionTransformer(func=rowwise_mean, validate=False), ['dim_0']),
        ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1'])
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [
        ('extract', transformer),
        ('classify', estimator)
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    expected = model.predict(X_test)

    # using sktime with sklearn pipeline
    transformer = ColumnTransformer([
        ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)), ['dim_0']),
        ('first', FunctionTransformer(func=rowwise_first, validate=False), ['dim_1'])
    ])
    estimator = RandomForestClassifier(n_estimators=2, random_state=1)
    steps = [
        ('extract', transformer),
        ('classify', estimator)
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    actual = model.predict(X_test)
    np.testing.assert_array_equal(expected, actual)
예제 #19
0
def test_Pipeline_random_state():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])),
             ('clf', DecisionTreeClassifier())]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without random_state
    assert pipe.random_state is None
    assert pipe.get_params()['random_state'] is None

    # Check that all components are initiated without random_state
    for step in pipe.steps:
        assert step[1].random_state is None
        assert step[1].get_params()['random_state'] is None

    # Check that if random state is set, it's set to itself and all its random components
    rs = 1234
    pipe.set_params(**{'random_state': rs})

    assert pipe.random_state == rs
    assert pipe.get_params()['random_state'] == rs

    for step in pipe.steps:
        assert step[1].random_state == rs
        assert step[1].get_params()['random_state'] == rs

    # Check specific results
    X_train, y_train = load_gunpoint(return_X_y=True)
    X_test, y_test = load_gunpoint("TEST", return_X_y=True)

    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('extract',
         RowwiseTransformer(FunctionTransformer(func=np.mean,
                                                validate=False))),
        ('clf', DecisionTreeClassifier())
    ]
    pipe = Pipeline(steps, random_state=rs)
    pipe.fit(X_train, y_train)
    y_pred_first = pipe.predict(X_test)
    N_ITER = 10
    for _ in range(N_ITER):
        pipe = Pipeline(steps, random_state=rs)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        np.testing.assert_array_equal(y_pred_first, y_pred)
    return acf(x, nlags=nlags).ravel()


def powerspectrum(x, **kwargs):
    x = np.asarray(x).ravel()
    fft = np.fft.fft(x)
    ps = fft.real * fft.real + fft.imag * fft.imag
    return ps[:ps.shape[0] // 2].ravel()


rise_steps = [
    ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
    ('transform',
     FeatureUnion([
         ('ar',
          RowwiseTransformer(FunctionTransformer(func=ar_coefs,
                                                 validate=False))),
         ('acf',
          RowwiseTransformer(
              FunctionTransformer(func=acf_coefs, validate=False))),
         ('ps',
          RowwiseTransformer(
              FunctionTransformer(func=powerspectrum, validate=False)))
     ])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(rise_steps)
# ('RISE', TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True)),
classifiers = [('TimeSeriesForest', TimeSeriesForest()),
               ('ProximityForest', ProximityForest(n_trees=100)),
               ('BOSS', BOSSEnsemble()),
               ('RandomIntervalSpectralForest', RandomIntervalSpectralForest())
               ]