Exemple #1
0
def test_TimeSeriesForest_predictions(n_estimators, n_intervals):
    random_state = 1234
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    features = [np.mean, np.std, _slope]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                random_state=random_state, features=features
            ),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    estimator = Pipeline(steps)

    clf1 = ComposableTimeSeriesForestClassifier(
        estimator=estimator, random_state=random_state, n_estimators=n_estimators
    )
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using
    # RandomIntervalFeatureExtractor internally
    clf2 = ComposableTimeSeriesForestClassifier(
        random_state=random_state, n_estimators=n_estimators
    )
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
Exemple #2
0
def test_pipeline(network=catch22ForestClassifier()):
    '''
    slightly more generalised test with sktime pipelines
        load data,
        construct pipeline with classifier,
        fit,
        score
    '''

    print("Start test_pipeline()")

    from sktime.pipeline import Pipeline

    # just a simple (useless) pipeline

    steps = [('clf', network)]
    clf = Pipeline(steps)

    X_train, y_train = load_gunpoint(split='TRAIN', return_X_y=True)
    X_test, y_test = load_gunpoint(split='TEST', return_X_y=True)

    hist = clf.fit(X_train[:10], y_train[:10])

    print(clf.score(X_test[:10], y_test[:10]))
    print("End test_pipeline()")
Exemple #3
0
def test_highLevelsktime(network=catch22ForestClassifier()):
    '''
    truly generalised test with sktime tasks/strategies
        load data, build task
        construct classifier, build strategy
        fit,
        score
    '''

    print("start test_highLevelsktime()")

    from sktime.highlevel.tasks import TSCTask
    from sktime.highlevel.strategies import TSCStrategy
    from sklearn.metrics import accuracy_score

    train = load_gunpoint(split='TRAIN')
    test = load_gunpoint(split='TEST')
    task = TSCTask(target='class_val', metadata=train)

    strategy = TSCStrategy(network)
    strategy.fit(task, train.iloc[:10])

    y_pred = strategy.predict(test.iloc[:10])
    y_test = test.iloc[:10][task.target].values.astype(np.float)
    print(accuracy_score(y_test, y_pred))

    print("End test_highLevelsktime()")
Exemple #4
0
def test_mrseql_on_gunpoint():
    # load training data
    X_train, y_train = load_gunpoint(split='train', return_X_y=True)
    X_test, y_test = load_gunpoint(split='test', return_X_y=True)

    sax_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sax'])
    sfa_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sfa'])
    ss_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sax', 'sfa'])

    # fit training data
    sax_clf.fit(X_train, y_train)
    sfa_clf.fit(X_train, y_train)
    ss_clf.fit(X_train, y_train)

    # prediction
    sax_predicted = sax_clf.predict(X_test)
    sfa_predicted = sfa_clf.predict(X_test)
    ss_predicted = ss_clf.predict(X_test)

    # test feature space dimension
    # the multi-domain classifier (ss_clf) should produce as many features
    # as the others (sax_clf and sfa_clf) combine
    np.testing.assert_equal(
        ss_clf.ots_clf.coef_.shape[1],
        sfa_clf.ots_clf.coef_.shape[1] + sax_clf.ots_clf.coef_.shape[1])

    # test number of correct predictions
    np.testing.assert_equal((sax_predicted == y_test).sum(), 148)
    np.testing.assert_equal((sfa_predicted == y_test).sum(), 150)
    np.testing.assert_equal((ss_predicted == y_test).sum(), 150)
Exemple #5
0
def test_weasel_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    # train WEASEL
    weasel = WEASEL(random_state=1, binning_strategy="equi-depth")
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    # print(score)
    assert score >= 0.99
Exemple #6
0
def test_drcif_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train DrCIF
    drcif = DrCIF(n_estimators=20, random_state=0)
    drcif.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = drcif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, drcif_gunpoint_probas)
Exemple #7
0
def test_cif_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train CIF
    cif = CanonicalIntervalForest(n_estimators=100, random_state=0)
    cif.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = cif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, cif_gunpoint_probas)
Exemple #8
0
def test_arsenal_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train Arsenal
    arsenal = Arsenal(num_kernels=1000, n_estimators=10, random_state=0)
    arsenal.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = arsenal.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, arsenal_gunpoint_probas)
Exemple #9
0
def test_individual_tde_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train IndividualTDE
    indiv_tde = IndividualTDE(random_state=0)
    indiv_tde.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = indiv_tde.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, individual_tde_gunpoint_probas)
Exemple #10
0
def test_rocket_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train ROCKET
    rocket = ROCKETClassifier(num_kernels=1000, random_state=0)
    rocket.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = rocket.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, rocket_gunpoint_probas)
Exemple #11
0
def test_weasel_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    # indices = np.random.RandomState(0).permutation(10)

    # train WEASEL
    weasel = WEASEL(random_state=1379)
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    # print(score)
    assert score >= 0.99
Exemple #12
0
def test_signatures_on_gunpoint():
    # Load data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)

    # Fit a simple sig classifier
    clf = SignatureClassifier(random_state=0)
    clf.fit(X_train, y_train)

    # Test and check accuracy
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    preds_test = clf.predict(X_test)
    accuracy = accuracy_score(preds_test, y_test)
    assert accuracy == 0.96
Exemple #13
0
def test_stsf_on_gunpoint():
    """Test of STSF on gun point."""
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    stsf = SupervisedTimeSeriesForest(n_estimators=20, random_state=0)
    stsf.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = stsf.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, stsf_gunpoint_probas)
Exemple #14
0
def test_tde_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split='train', return_X_y=True)
    X_test, y_test = load_gunpoint(split='test', return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train tde
    tde = TemporalDictionaryEnsemble(random_state=0)
    tde.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = tde.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, tde_gunpoint_probas)
def test_catch22_forest_classifier_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train c22f
    c22f = Catch22ForestClassifier(random_state=0)
    c22f.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = c22f.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, catch22_forest_classifier_gunpoint_probas)
Exemple #16
0
def test_boss_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split='train', return_X_y=True)
    X_test, y_test = load_gunpoint(split='test', return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train boss
    boss = BOSSEnsemble(random_state=0)
    boss.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = boss.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, boss_gunpoint_probas)
def test_matrix_profile_classifier_on_gunpoint():
    """Test of MatrixProfileClassifier on gun point."""
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train matrix profile classifier
    mpc = MatrixProfileClassifier(random_state=0)
    mpc.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = mpc.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas,
                               matrix_profile_classifier_gunpoint_probas)
Exemple #18
0
def test_cboss_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train cBOSS
    cboss = ContractableBOSS(n_parameter_samples=50,
                             max_ensemble_size=10,
                             random_state=0)
    cboss.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = cboss.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, cboss_gunpoint_probas)
def test_catch22_classifier_on_gunpoint():
    """Test of Catch22Classifier on gun point."""
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train catch22 classifier
    rf = RandomForestClassifier(n_estimators=20)
    c22c = Catch22Classifier(random_state=0, estimator=rf)
    c22c.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = c22c.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, catch22_classifier_gunpoint_probas)
Exemple #20
0
def test_row_transformer_function_transformer_series_to_primitives():
    X, y = load_gunpoint(return_X_y=True)
    ft = FunctionTransformer(func=np.mean, validate=False)
    t = RowTransformer(ft)
    Xt = t.fit_transform(X, y)
    assert Xt.shape == X.shape
    assert isinstance(Xt.iloc[0, 0], float)  # check series-to-primitive transforms
Exemple #21
0
def test_transformer():
    # load training data
    X, Y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    p = SFA(word_length=word_length, alphabet_size=alphabet_size,
            binning_method="equi-depth").fit(X, Y)
    print("Equi Depth")
    print(p.breakpoints)
    assert p.breakpoints.shape == (word_length, alphabet_size)
    assert np.equal(0, p.breakpoints[1, :-1]).all()  # imag component is 0

    p = SFA(word_length=word_length, alphabet_size=alphabet_size,
            binning_method="equi-width").fit(X, Y)
    print("Equi Width")
    print(p.breakpoints)
    assert p.breakpoints.shape == (word_length, alphabet_size)
    assert np.equal(0, p.breakpoints[1, :-1]).all()  # imag component is 0

    p = SFA(word_length=word_length, alphabet_size=alphabet_size,
            binning_method="information-gain").fit(X, Y)
    print("Information Gain")
    print(p.breakpoints)
    assert p.breakpoints.shape == (word_length, alphabet_size)

    print(p.breakpoints[1, :-1])
    assert np.equal(0, p.breakpoints[1, :-1]).all()  # imaginary component is 0
Exemple #22
0
def test_sfa_anova():
    # load training data
    X, Y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    for binning in ["information-gain", "equi-depth"]:
        print("SFA with ANOVA one-sided test")
        window_size = 32
        p = SFA(word_length=word_length, anova=True,
                alphabet_size=alphabet_size, window_size=window_size,
                binning_method=binning).fit(X, Y)

        print(p.breakpoints)
        print(p.support)
        print(p.dft_length)
        assert p.breakpoints.shape == (word_length, alphabet_size)

        print("SFA with first feq coefficients")
        p2 = SFA(word_length=word_length, anova=False,
                 alphabet_size=alphabet_size, window_size=window_size,
                 binning_method=binning).fit(X, Y)

        print(p2.breakpoints)
        print(p2.support)
        print(p2.dft_length)

        assert(p.dft_length != p2.dft_length)
        assert(p.breakpoints != p2.breakpoints).any()
Exemple #23
0
def test_typed_dict():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        levels=2,
        typed_dict=True,
    )
    p.fit(X, y)
    word_list = p.bag_to_string(p.transform(X, y)[0][0])

    word_length = 6
    alphabet_size = 4

    p2 = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        levels=2,
        typed_dict=False,
    )
    p2.fit(X, y)
    word_list2 = p2.bag_to_string(p2.transform(X, y)[0][0])

    assert word_list == word_list2
Exemple #24
0
def test_sfa_anova(binning_method):
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    # SFA with ANOVA one-sided test
    window_size = 32
    p = SFA(
        word_length=word_length,
        anova=True,
        alphabet_size=alphabet_size,
        window_size=window_size,
        binning_method=binning_method,
    ).fit(X, y)

    assert p.breakpoints.shape == (word_length, alphabet_size)
    _ = p.transform(X, y)

    # SFA with first feq coefficients
    p2 = SFA(
        word_length=word_length,
        anova=False,
        alphabet_size=alphabet_size,
        window_size=window_size,
        binning_method=binning_method,
    ).fit(X, y)

    assert p.dft_length != p2.dft_length
    assert (p.breakpoints != p2.breakpoints).any()
    _ = p2.transform(X, y)
Exemple #25
0
def test_different_implementations():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)

    # Compare with chained transformations.
    tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed)
    tran2 = RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))
    A = tran2.fit_transform(tran1.fit_transform(X_train))

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed)
    B = tran.fit_transform(X_train)

    np.testing.assert_array_equal(A, B)

    # Compare with transformer pipeline using TSFeatureUnion.
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)),
        ('transform', TSFeatureUnion([
            ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))),
            ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))),
        ])),
    ]
    pipe = TSPipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    n_ints = a.shape[1] // 2  # Rename columns for comparing re-ordered arrays.
    a.columns = [*a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std']
    a = a.reindex(np.sort(a.columns), axis=1)

    tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std],
                                          random_state=random_seed)
    b = tran.fit_transform(X_train)
    b = b.reindex(np.sort(b.columns), axis=1)
    np.testing.assert_array_equal(a, b)
def test_different_pipelines():
    random_seed = 1233
    X_train, y_train = load_gunpoint(return_X_y=True)
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
        ('transform',
         FeatureUnion([
             ('mean',
              RowwiseTransformer(
                  FunctionTransformer(func=np.mean, validate=False))),
             ('std',
              RowwiseTransformer(
                  FunctionTransformer(func=np.std, validate=False))),
             ('slope',
              RowwiseTransformer(
                  FunctionTransformer(func=time_series_slope,
                                      validate=False))),
         ])),
    ]
    pipe = Pipeline(steps, random_state=random_seed)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals='sqrt',
        features=[np.mean, np.std, time_series_slope],
        random_state=random_seed)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Exemple #27
0
def test_FeatureUnion():
    X, y = load_gunpoint(return_X_y=True)
    feature_union = FeatureUnion([("mean", mean_transformer),
                                  ("std", std_transformer)])
    Xt = feature_union.fit_transform(X, y)
    assert Xt.shape == (X.shape[0],
                        X.shape[1] * len(feature_union.transformer_list))
Exemple #28
0
def test_dft_mft():
    # load training data
    X, Y = load_gunpoint(split="train", return_X_y=True)
    X_tab = tabularize(X, return_array=True)

    word_length = 6
    alphabet_size = 4

    print("Single DFT transformation")
    window_size = np.shape(X_tab)[1]
    p = SFA(word_length=word_length, alphabet_size=alphabet_size,
            window_size=window_size, binning_method="equi-depth").fit(X, Y)
    dft = p._discrete_fourier_transform(X_tab[0])
    mft = p._mft(X_tab[0])

    assert ((mft-dft < 0.0001).all())

    print("Windowed DFT transformation")

    for norm in [True, False]:
        for window_size in [140]:
            p = SFA(word_length=word_length, norm=norm,
                    alphabet_size=alphabet_size, window_size=window_size,
                    binning_method="equi-depth").fit(X, Y)
            mft = p._mft(X_tab[0])
            for i in range(len(X_tab[0]) - window_size + 1):
                dft_transformed = p._discrete_fourier_transform(
                                        X_tab[0, i:window_size+i])
                assert(mft[i] - dft_transformed < 0.001).all()

            assert(len(mft) == len(X_tab[0]) - window_size + 1)
            assert(len(mft[0]) == word_length)
Exemple #29
0
def test_orchestration():
    data = load_gunpoint()

    dataset = DatasetRAM(dataset=data, dataset_name='gunpoint')
    task = TSCTask(target='class_val')

    # create strategies
    clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1)
    strategy = TSCStrategy(clf)

    # result backend
    resultRAM = ResultRAM()
    orchestrator = Orchestrator(datasets=[dataset],
                                tasks=[task],
                                strategies=[strategy],
                                cv=SingleSplit(random_state=1),
                                result=resultRAM)

    orchestrator.run(save_strategies=False)
    result = resultRAM.load()
    actual = result[0].y_pred

    # expected output
    task = TSCTask(target='class_val')
    cv = SingleSplit(random_state=1)
    for train_idx, test_idx in cv.split(data):
        train = data.iloc[train_idx, :]
        test = data.iloc[test_idx, :]
        clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1)
        strategy = TSCStrategy(clf)
        strategy.fit(task, train)
        expected = strategy.predict(test)

    np.testing.assert_array_equal(actual, expected)
Exemple #30
0
def test_tsfresh_classifier_on_gunpoint():
    """Test of TSFreshClassifier on gun point."""
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    indices = np.random.RandomState(0).permutation(10)

    # train TSFresh classifier
    rf = RandomForestClassifier(n_estimators=20)
    tsfc = TSFreshClassifier(random_state=0,
                             default_fc_parameters="minimal",
                             estimator=rf)
    tsfc.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = tsfc.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, tsfresh_classifier_gunpoint_probas)