Exemple #1
0
def test_prob_threshold_on_unit_test_data():
    """Test of ProbabilityThresholdEarlyClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train probability threshold
    pt = ProbabilityThresholdEarlyClassifier(
        random_state=0,
        classification_points=[6, 16, 24],
        probability_threshold=1,
        estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0),
    )
    pt.fit(X_train, y_train)

    final_probas = np.zeros((10, 2))
    final_decisions = np.zeros(10)

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in pt.classification_points:
        X = X_test[indices, :, :i]
        probas = pt.predict_proba(X)
        decisions, states = pt.decide_prediction_safety(X, probas, states)

        for n in range(10):
            if decisions[n] and final_decisions[n] == 0:
                final_probas[n] = probas[n]
                final_decisions[n] = i

    testing.assert_array_equal(final_probas, pt_unit_test_probas)
Exemple #2
0
def test_run_clustering_experiment():
    """Test running and saving results for clustering.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN", return_X_y=True)
    test_X, test_Y = load_unit_test("TEST", return_X_y=True)
    run_clustering_experiment(
        train_X,
        TimeSeriesKMeans(n_clusters=2),
        results_path="../Temp/",
        trainY=train_Y,
        testX=test_X,
        testY=test_Y,
        cls_name="kmeans",
        dataset_name=dataset,
        resample_id=0,
    )
    test_path = f"../Temp/kmeans/Predictions/{dataset}/testResample0.csv"
    train_path = f"../Temp/kmeans/Predictions/{dataset}/trainResample0.csv"
    assert os.path.isfile(test_path)
    assert os.path.isfile(train_path)
    os.remove(test_path)
    os.remove(train_path)
Exemple #3
0
def test_tsf_predictions(n_estimators, n_intervals):
    """Test TSF predictions."""
    random_state = 1234
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")

    features = [np.mean, np.std, _slope]
    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(random_state=random_state,
                                           features=features),
        ),
        ("clf", DecisionTreeClassifier()),
    ]
    estimator = Pipeline(steps)

    clf1 = ComposableTimeSeriesForestClassifier(estimator=estimator,
                                                random_state=random_state,
                                                n_estimators=n_estimators)
    clf1.fit(X_train, y_train)
    a = clf1.predict_proba(X_test)

    # default, semi-modular implementation using
    # RandomIntervalFeatureExtractor internally
    clf2 = ComposableTimeSeriesForestClassifier(random_state=random_state,
                                                n_estimators=n_estimators)
    clf2.fit(X_train, y_train)
    b = clf2.predict_proba(X_test)

    np.testing.assert_array_equal(a, b)
Exemple #4
0
def test_stc_on_unit_test_data():
    """Test of ShapeletTransformClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train STC
    stc = ShapeletTransformClassifier(
        estimator=RotationForest(n_estimators=3),
        max_shapelets=20,
        n_shapelet_samples=500,
        batch_size=100,
        random_state=0,
        save_transformed_data=True,
    )
    stc.fit(X_train, y_train)

    # assert probabilities are the same
    probas = stc.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, stc_unit_test_probas)

    # test train estimate
    train_probas = stc._get_train_probs(X_train, y_train)
    train_preds = stc.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.75
Exemple #5
0
def test_hivecote_v1_on_unit_test_data():
    """Test of HIVECOTEV1 on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train HIVE-COTE v1
    hc1 = HIVECOTEV1(
        random_state=0,
        stc_params={
            "estimator": RotationForest(n_estimators=3),
            "n_shapelet_samples": 500,
            "max_shapelets": 20,
            "batch_size": 100,
        },
        tsf_params={"n_estimators": 10},
        rise_params={"n_estimators": 10},
        cboss_params={
            "n_parameter_samples": 25,
            "max_ensemble_size": 5
        },
    )
    hc1.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = hc1.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      hivecote_v1_unit_test_probas,
                                      decimal=2)
Exemple #6
0
def _reproduce_classification_unit_test(estimator):
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    estimator.fit(X_train, y_train)
    return estimator.predict_proba(X_test.iloc[indices])
    def test_classifier_on_unit_test_data(self, estimator_class):
        """Test classifier on unit test data."""
        # we only use the first estimator instance for testing
        classname = estimator_class.__name__

        # retrieve expected predict_proba output, and skip test if not available
        if classname in unit_test_proba.keys():
            expected_probas = unit_test_proba[classname]
        else:
            # skip test if no expected probas are registered
            return None

        # we only use the first estimator instance for testing
        estimator_instance = clone(
            estimator_class.create_test_instance(
                parameter_set="results_comparison"))
        # set random seed if possible
        if "random_state" in estimator_instance.get_params().keys():
            estimator_instance.set_params(random_state=0)

        # load unit test data
        X_train, y_train = load_unit_test(split="train")
        X_test, _ = load_unit_test(split="test")
        indices = np.random.RandomState(0).choice(len(y_train),
                                                  10,
                                                  replace=False)

        # train classifier and predict probas
        estimator_instance.fit(X_train, y_train)
        y_proba = estimator_instance.predict_proba(X_test.iloc[indices])

        # assert probabilities are the same
        _assert_array_almost_equal(y_proba, expected_probas, decimal=2)
def test_col_ens_on_unit_test_data():
    """Test of ColumnEnsembleClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train Column ensemble with a single
    fp = FreshPRINCE(
        random_state=0,
        default_fc_parameters="minimal",
        n_estimators=10,
    )
    estimators = [("FreshPrince", fp, [0])]
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # preds = col_ens.predict(X_test.iloc[indices])

    # assert preds[0] == 2
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])

    testing.assert_array_almost_equal(probas,
                                      col_ens_unit_test_probas,
                                      decimal=2)
Exemple #9
0
def test_run_clustering_experiment(tmp_path):
    """Test running and saving results for clustering.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN")
    test_X, test_Y = load_unit_test("TEST")
    run_clustering_experiment(
        train_X,
        TimeSeriesKMeans(n_clusters=2),
        results_path=tmp_path,
        trainY=train_Y,
        testX=test_X,
        testY=test_Y,
        cls_name="kmeans",
        dataset_name=dataset,
        resample_id=0,
    )
    test_path = tmp_path.joinpath(
        f"kmeans/Predictions/{dataset}/testResample0.csv")
    train_path = tmp_path.joinpath(
        f"kmeans/Predictions/{dataset}/trainResample0.csv")
    assert test_path.is_file()
    assert train_path.is_file()
    # remove files
    test_path.unlink()
    train_path.unlink()
Exemple #10
0
def test_run_classification_experiment(tmp_path):
    """Test running and saving results for classifiers.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN")
    test_X, test_Y = load_unit_test("TEST")
    run_classification_experiment(
        train_X,
        train_Y,
        test_X,
        test_Y,
        TimeSeriesForestClassifier(n_estimators=10),
        str(tmp_path),
        cls_name="TSF",
        dataset="UnitTest",
        resample_id=0,
        train_file=True,
    )
    test_path = tmp_path.joinpath(
        f"TSF/Predictions/{dataset}/testResample0.csv")
    train_path = tmp_path.joinpath(
        f"TSF/Predictions/{dataset}/trainResample0.csv")
    assert test_path.is_file()
    assert train_path.is_file()
    # remove files
    test_path.unlink()
    train_path.unlink()
Exemple #11
0
def test_cboss_on_unit_test_data():
    """Test of cBOSS on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train cBOSS
    cboss = ContractableBOSS(
        n_parameter_samples=25,
        max_ensemble_size=5,
        random_state=0,
        save_train_predictions=True,
    )
    cboss.fit(X_train, y_train)

    # assert probabilities are the same
    probas = cboss.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      cboss_unit_test_probas,
                                      decimal=2)

    # test train estimate
    train_probas = cboss._get_train_probs(X_train, y_train)
    train_preds = cboss.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.75
Exemple #12
0
def test_hivecote_v2_on_unit_test():
    """Test of HIVECOTEV2 on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train HIVE-COTE v2
    hc2 = HIVECOTEV2(
        random_state=0,
        stc_params={
            "estimator": RotationForest(n_estimators=3),
            "n_shapelet_samples": 500,
            "max_shapelets": 20,
            "batch_size": 100,
        },
        drcif_params={"n_estimators": 10},
        arsenal_params={
            "num_kernels": 100,
            "n_estimators": 5
        },
        tde_params={
            "n_parameter_samples": 10,
            "max_ensemble_size": 5,
            "randomly_selected_params": 5,
        },
    )
    hc2.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = hc2.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, hivecote_v2_unit_test_probas)
Exemple #13
0
def test_tde_on_unit_test_data():
    """Test of TDE on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train TDE
    tde = TemporalDictionaryEnsemble(
        n_parameter_samples=10,
        max_ensemble_size=5,
        randomly_selected_params=5,
        random_state=0,
    )
    tde.fit(X_train, y_train)

    # assert probabilities are the same
    probas = tde.predict_proba(X_test.iloc[indices]).round(6)
    testing.assert_array_equal(probas, tde_unit_test_probas)

    # test loocv train estimate
    train_probas = tde._get_train_probs(X_train, y_train)
    train_preds = tde.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.85

    # test oob estimate
    train_probas = tde._get_train_probs(X_train,
                                        y_train,
                                        train_estimate_method="oob")
    train_preds = tde.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.8
def test_fresh_prince_on_unit_test_data():
    """Test of FreshPRINCE on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train FreshPRINCE classifier
    fp = FreshPRINCE(
        random_state=0,
        default_fc_parameters="minimal",
        n_estimators=10,
        save_transformed_data=True,
    )
    fp.fit(X_train, y_train)

    # assert probabilities are the same
    probas = fp.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      fp_classifier_unit_test_probas,
                                      decimal=2)

    # test train estimate
    train_probas = fp._get_train_probs(X_train, y_train)
    train_preds = fp.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.75
Exemple #15
0
def test_run_classification_experiment():
    """Test running and saving results for classifiers.

    Currently it just checks the files have been created, then deletes them.
    """
    dataset = "UnitTest"
    train_X, train_Y = load_unit_test("TRAIN", return_X_y=True)
    test_X, test_Y = load_unit_test("TEST", return_X_y=True)
    run_classification_experiment(
        train_X,
        train_Y,
        test_X,
        test_Y,
        TimeSeriesForestClassifier(n_estimators=10),
        "../Temp/",
        cls_name="TSF",
        dataset="UnitTest",
        resample_id=0,
        train_file=True,
    )
    test_path = f"../Temp/TSF/Predictions/{dataset}/testResample0.csv"
    train_path = f"../Temp/TSF/Predictions/{dataset}/trainResample0.csv"
    assert os.path.isfile(test_path)
    assert os.path.isfile(train_path)
    os.remove(test_path)
    os.remove(train_path)
def compare_classifiers():
    """Build pipeline classifiers and compare to published results."""
    # Data set list
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    # Define Transformer pipeline
    print(y_train)
    print(type(y_train))
    print(type(y_train[0]))
Exemple #17
0
def test_contracted_arsenal_on_unit_test_data():
    """Test of contracted Arsenal on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)

    # train contracted DrCIF
    arsenal = Arsenal(time_limit_in_minutes=0.025, random_state=0)
    arsenal.fit(X_train, y_train)

    assert len(arsenal.estimators_) > 1
    assert accuracy_score(y_test, arsenal.predict(X_test)) >= 0.8
Exemple #18
0
def test_contracted_drcif_on_unit_test_data():
    """Test of contracted DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)

    # train contracted DrCIF
    drcif = DrCIF(time_limit_in_minutes=0.025, random_state=0)
    drcif.fit(X_train, y_train)

    assert len(drcif.estimators_) > 1
    assert accuracy_score(y_test, drcif.predict(X_test)) >= 0.8
Exemple #19
0
def test_tsf_on_unit_test_data():
    """Test of TimeSeriesForestClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train TSF
    tsf = TimeSeriesForestClassifier(n_estimators=10, random_state=0)
    tsf.fit(X_train, y_train)
    # assert probabilities are the same
    probas = tsf.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, tsf_unit_test_probas)
Exemple #20
0
def test_stsf_on_unit_test_data():
    """Test of SupervisedTimeSeriesForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train STSF
    stsf = SupervisedTimeSeriesForest(n_estimators=10, random_state=0)
    stsf.fit(X_train, y_train)

    # assert probabilities are the same
    probas = stsf.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, stsf_unit_test_probas)
Exemple #21
0
def test_risf_on_unit_test_data():
    """Test of RandomIntervalSpectralEnsemble on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train RISE
    rise = RandomIntervalSpectralForest(n_estimators=10, random_state=0)
    rise.fit(X_train, y_train)

    # assert probabilities are the same
    probas = rise.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, rise_unit_test_probas)
Exemple #22
0
def test_dtc_on_unit_test_data():
    """Test of CanonicalIntervalForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train CIF with the sklearn decision tree classifier
    cif = CanonicalIntervalForest(n_estimators=10,
                                  base_estimator="dtc",
                                  random_state=0)
    cif.fit(X_train, y_train)

    cif.predict_proba(X_test.iloc[indices])
def test_knn_on_unit_test():
    """Test function for elastic knn, to be reinstated soon."""
    # load arrowhead data for unit tests
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    for i in range(0, len(distance_functions)):
        knn = KNeighborsTimeSeriesClassifier(distance=distance_functions[i], )
        knn.fit(X_train, y_train)
        pred = knn.predict(X_test)
        correct = 0
        for j in range(0, len(pred)):
            if pred[j] == y_test[j]:
                correct = correct + 1
        assert correct == expected_correct[distance_functions[i]]
Exemple #24
0
def test_pf_on_unit_test_data():
    """Test of ProximityForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train PF
    pf = ProximityForest(n_estimators=5, random_state=0)
    pf.fit(X_train, y_train)

    # assert probabilities are the same
    probas = pf.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas, pf_unit_test_probas, decimal=2)
Exemple #25
0
def test_cif_on_unit_test_data():
    """Test of CanonicalIntervalForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train CIF
    cif = CanonicalIntervalForest(n_estimators=10, random_state=0)
    cif.fit(X_train, y_train)

    # assert probabilities are the same
    probas = cif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, cif_unit_test_probas)
Exemple #26
0
def test_shapedtw_on_unit_test_data():
    """Test of ShapeDTW on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train ShapeDTW
    shapedtw = ShapeDTW()
    shapedtw.fit(X_train, y_train)

    # assert probabilities are the same
    probas = shapedtw.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas, shapedtw_unit_test_probas, decimal=2)
def test_knn_bounding_matrix():
    """Test knn with custom bounding parameters."""
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    for i in range(0, len(distance_functions)):
        knn = KNeighborsTimeSeriesClassifier(
            distance=distance_functions[i], distance_params={"window": 0.5}
        )
        knn.fit(X_train, y_train)
        pred = knn.predict(X_test)
        correct = 0
        for j in range(0, len(pred)):
            if pred[j] == y_test[j]:
                correct = correct + 1
        assert correct == expected_correct[distance_functions[i]]
Exemple #28
0
def test_col_ens_on_unit_test_data():
    """Test of ColumnEnsembleClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)
    drcif = DrCIF(n_estimators=10, random_state=0)
    estimators = [("DrCIF", drcif, [0])]
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_unit_test_probas,
                                      decimal=2)
Exemple #29
0
def test_individual_boss_on_unit_test():
    """Test of IndividualBOSS on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train IndividualBOSS
    indiv_boss = IndividualBOSS(random_state=0)
    indiv_boss.fit(X_train, y_train)
    # assert probabilities are the same
    probas = indiv_boss.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      individual_boss_unit_test_probas,
                                      decimal=2)
Exemple #30
0
def test_weasel_on_unit_test_data():
    """Test of WEASEL on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train WEASEL
    weasel = WEASEL(random_state=0, window_inc=4)
    weasel.fit(X_train, y_train)

    # assert probabilities are the same
    probas = weasel.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      weasel_unit_test_probas,
                                      decimal=2)