def test_min_length(n_intervals, min_length):
    series_len = 30
    x = np.arange(series_len)

    tran = RandomIntervalSegmenter(n_intervals=n_intervals, min_length=min_length)
    intervals = tran._rand_intervals_fixed_n(x, n_intervals=n_intervals)
    starts = intervals[:, 0]
    ends = intervals[:, 1]
    assert np.all(ends - starts >= min_length)  # minimum length
def test_rand_intervals_rand_n(random_state):
    tran = RandomIntervalSegmenter(random_state=random_state)
    series_len = 30
    x = np.arange(series_len)

    intervals = tran._rand_intervals_rand_n(x)
    assert intervals.ndim == 2
    assert np.issubdtype(intervals.dtype, np.integer)
    # assert intervals.shape[0] == np.unique(intervals, axis=0).shape[0]  # no duplicates

    starts = intervals[:, 0]
    ends = intervals[:, 1]
    assert np.all(ends <= x.size)  # within bounds
    assert np.all(starts >= 0)  # within bounds
    assert np.all(ends > starts)  # only non-empty intervals
Example #3
0
def test_heterogenous_pipeline_column_ensmbler():
    X_train, y_train = load_basic_motions("TRAIN", return_X_y=True)
    X_test, y_test = load_basic_motions("TEST", return_X_y=True)

    n_intervals = 3

    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=1)

    # dims 0-3 with alternating classifiers.
    ct = ColumnEnsembleClassifier([
        ("RandomIntervalTree", clf1, [0]),
        ("KNN4", KNNTSC(n_neighbors=1), [4]),
        ("BOSSEnsemble1 ", BOSSEnsemble(ensemble_size=3), [1]),
        ("KNN2", KNNTSC(n_neighbors=1), [2]),
        ("BOSSEnsemble3", BOSSEnsemble(ensemble_size=3), [3]),
    ])

    ct.fit(X_train, y_train)
    ct.score(X_test, y_test)
def test_different_pipelines():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
        ('transform',
         FeatureUnion([
             ('mean',
              RowwiseTransformer(
                  FunctionTransformer(func=np.mean, validate=False))),
             ('std',
              RowwiseTransformer(
                  FunctionTransformer(func=np.std, validate=False))),
             ('slope',
              RowwiseTransformer(
                  FunctionTransformer(func=time_series_slope,
                                      validate=False))),
         ])),
    ]
    pipe = Pipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals='sqrt',
        features=[np.mean, np.std, time_series_slope],
        random_state=random_seed)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_trees=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISE",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=1, min_length=5)),
                 ('transform',
                  FeatureUnion([('acf',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=acf_coefs,
                                                         validate=False))),
                                ('ps',
                                 RowwiseTransformer(
                                     FunctionTransformer(func=powerspectrum,
                                                         validate=False)))])),
                 ('tabularise', Tabulariser()),
                 ('clf', DecisionTreeClassifier())]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                          n_estimators=100)
        exp.run_experiment(overwrite=True,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISEComposite",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
def test_pipeline_predictions(n_intervals, n_estimators):
    random_state = 1234

    # Due to tie-breaking/floating point rounding in the final decision tree classifier, the results depend on the
    # exact column order of the input data

    #  Compare pipeline predictions outside of ensemble.
    steps = [('segment', RandomIntervalSegmenter(n_intervals=n_intervals)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False))),
                            ('slope',
                             RowwiseTransformer(
                                 FunctionTransformer(func=time_series_slope,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf1 = Pipeline(steps, random_state=random_state)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [('transform',
              RandomIntervalFeatureExtractor(
                  n_intervals=n_intervals,
                  features=[np.mean, np.std, time_series_slope])),
             ('clf', DecisionTreeClassifier())]
    clf2 = Pipeline(steps, random_state=random_state)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
                                    random_state=random_seed)
    tran2 = RowwiseTransformer(
        FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
                                          features=[np.mean],
                                          random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)
def test_output_format_dim(len_series, n_instances, n_intervals):
    X = generate_df_from_array(np.ones(len_series), n_rows=n_instances, n_cols=1)

    trans = RandomIntervalSegmenter(n_intervals=n_intervals)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of generated intervals/columns.
    if n_intervals != 'random':
        if np.issubdtype(type(n_intervals), np.floating):
            assert Xt.shape[1] == np.maximum(1, int(len_series * n_intervals))
        elif np.issubdtype(type(n_intervals), np.integer):
            assert Xt.shape[1] == n_intervals
        elif n_intervals == 'sqrt':
            assert Xt.shape[1] == np.maximum(1, int(np.sqrt(len_series)))
        elif n_intervals == 'log':
            assert Xt.shape[1] == np.maximum(1, int(np.log(len_series)))
def test_random_state():
    X = generate_df_from_array(np.random.normal(size=10))
    random_state = 1234

    for n_intervals in [0.5, 10, 'sqrt', 'random', 'log']:
        trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
        first_Xt = trans.fit_transform(X)
        for _ in range(N_ITER):
            trans = RandomIntervalSegmenter(n_intervals=n_intervals, random_state=random_state)
            Xt = trans.fit_transform(X)
            np.testing.assert_array_equal(tabularize(first_Xt).values, tabularize(Xt).values)
Example #10
0
def test_Pipeline_random_state():
    steps = [('transform', RandomIntervalFeatureExtractor(features=[np.mean])),
             ('clf', DecisionTreeClassifier())]
    pipe = Pipeline(steps)

    # Check that pipe is initiated without random_state
    assert pipe.random_state is None
    assert pipe.get_params()['random_state'] is None

    # Check that all components are initiated without random_state
    for step in pipe.steps:
        assert step[1].random_state is None
        assert step[1].get_params()['random_state'] is None

    # Check that if random state is set, it's set to itself and all its random components
    rs = 1234
    pipe.set_params(**{'random_state': rs})

    assert pipe.random_state == rs
    assert pipe.get_params()['random_state'] == rs

    for step in pipe.steps:
        assert step[1].random_state == rs
        assert step[1].get_params()['random_state'] == rs

    # Check specific results
    X_train, y_train = load_gunpoint(return_X_y=True)
    X_test, y_test = load_gunpoint("TEST", return_X_y=True)

    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals=3)),
        ('extract',
         RowwiseTransformer(FunctionTransformer(func=np.mean,
                                                validate=False))),
        ('clf', DecisionTreeClassifier())
    ]
    pipe = Pipeline(steps, random_state=rs)
    pipe.fit(X_train, y_train)
    y_pred_first = pipe.predict(X_test)
    N_ITER = 10
    for _ in range(N_ITER):
        pipe = Pipeline(steps, random_state=rs)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)
        np.testing.assert_array_equal(y_pred_first, y_pred)
Example #11
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == 'pf':
        return pf.ProximityForest(random_state = resampleId)
    elif cls.lower() == 'pt':
        return pf.ProximityTree(random_state = resampleId)
    elif cls.lower() == 'ps':
        return pf.ProximityStump(random_state = resampleId)
    elif cls.lower() == 'rise':
        return fb.RandomIntervalSpectralForest(random_state = resampleId)
    elif  cls.lower() == 'tsf':
        return ib.TimeSeriesForest(random_state = resampleId)
    elif cls.lower() == 'boss':
        return db.BOSSEnsemble()
    elif cls.lower() == 'st':
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == 'dtwcv':
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == 'ee' or cls.lower() == 'elasticensemble':
        return dist.ElasticEnsemble()
    elif cls.lower() == 'tsfcomposite':
        #It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == 'risecomposite':
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            ('transform', FeatureUnion([
                ('acf', RowwiseTransformer(FunctionTransformer(func=acf_coefs, validate=False))),
                ('ps', RowwiseTransformer(FunctionTransformer(func=powerspectrum, validate=False)))
            ])),
            ('tabularise', Tabulariser()),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100)
    else:
        raise Exception('UNKNOWN CLASSIFIER')
Example #12
0
def test_FeatureUnion_pipeline():
    # pipeline with segmentation plus multiple feature extraction
    steps = [('segment', RandomIntervalSegmenter(n_intervals=3)),
             ('transform',
              FeatureUnion([('mean',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.mean,
                                                     validate=False))),
                            ('std',
                             RowwiseTransformer(
                                 FunctionTransformer(func=np.std,
                                                     validate=False)))])),
             ('clf', DecisionTreeClassifier())]
    clf = Pipeline(steps)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_trees=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSF",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
            ('transform',
             FeatureUnion([('mean',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.mean,
                                                    validate=False))),
                           ('std',
                            RowwiseTransformer(
                                FunctionTransformer(func=np.std,
                                                    validate=False))),
                           ('slope',
                            RowwiseTransformer(
                                FunctionTransformer(func=time_series_slope,
                                                    validate=False)))])),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(base_estimator=base_estimator,
                                         n_estimators=100)
        exp.run_experiment(overwrite=False,
                           datasets_dir_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSFComposite",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
def acf_coefs(x, maxlag=100):
    x = np.asarray(x).ravel()
    nlags = np.minimum(len(x) - 1, maxlag)
    return acf(x, nlags=nlags).ravel()


def powerspectrum(x, **kwargs):
    x = np.asarray(x).ravel()
    fft = np.fft.fft(x)
    ps = fft.real * fft.real + fft.imag * fft.imag
    return ps[:ps.shape[0] // 2].ravel()


rise_steps = [
    ('segment', RandomIntervalSegmenter(n_intervals=1, min_length=5)),
    ('transform',
     FeatureUnion([
         ('ar',
          RowwiseTransformer(FunctionTransformer(func=ar_coefs,
                                                 validate=False))),
         ('acf',
          RowwiseTransformer(
              FunctionTransformer(func=acf_coefs, validate=False))),
         ('ps',
          RowwiseTransformer(
              FunctionTransformer(func=powerspectrum, validate=False)))
     ])), ('tabularise', Tabulariser()), ('clf', DecisionTreeClassifier())
]
base_estimator = Pipeline(rise_steps)
# ('RISE', TimeSeriesForestClassifier(base_estimator=base_estimator, n_estimators=100, bootstrap=True)),
def test_bad_input_args(bad_interval):
    X = generate_df_from_array(np.ones(10), n_rows=10, n_cols=2)
    with pytest.raises(ValueError):
        RandomIntervalSegmenter(n_intervals=bad_interval).fit(X)