Example #1
0
def test_contracted_drcif_on_unit_test_data():
    """Test of contracted DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)

    # train contracted DrCIF
    drcif = DrCIF(time_limit_in_minutes=0.025, random_state=0)
    drcif.fit(X_train, y_train)

    assert len(drcif.estimators_) > 1
    assert accuracy_score(y_test, drcif.predict(X_test)) >= 0.8
Example #2
0
def test_drcif_on_basic_motions():
    """Test of DrCIF on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)

    # train DrCIF
    drcif = DrCIF(n_estimators=10, random_state=0)
    drcif.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = drcif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, drcif_basic_motions_probas)
Example #3
0
def test_contracted_drcif_on_unit_test_data():
    """Test of contracted DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")

    # train contracted DrCIF
    drcif = DrCIF(
        time_limit_in_minutes=0.25,
        contract_max_n_estimators=10,
        random_state=0,
    )
    drcif.fit(X_train, y_train)

    assert len(drcif.estimators_) > 1
Example #4
0
def test_contracted_drcif():
    """Test of contracted DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")

    # train contracted DrCIF
    drcif = DrCIF(
        time_limit_in_minutes=0.25,
        contract_max_n_estimators=2,
        n_intervals=2,
        att_subsample_size=2,
        random_state=0,
    )
    drcif.fit(X_train, y_train)

    assert len(drcif.estimators_) > 1
def test_col_ens_on_basic_motions():
    """Test of ColumnEnsembleClassifier on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    X_test, y_test = load_basic_motions(split="test")
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)
    fp = FreshPRINCE(
        random_state=0,
        default_fc_parameters="minimal",
        n_estimators=10,
    )
    tde = TemporalDictionaryEnsemble(
        n_parameter_samples=10,
        max_ensemble_size=5,
        randomly_selected_params=5,
        random_state=0,
    )
    drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True)
    estimators = [
        ("FreshPrince", fp, [0, 1, 2]),
        ("TDE", tde, [3, 4]),
        ("DrCIF", drcif, [5]),
    ]

    # train column ensemble
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # preds = col_ens.predict(X_test.iloc[indices])

    # assert preds[0] == 2
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_basic_motions_probas,
                                      decimal=2)
Example #6
0
def test_col_ens_on_basic_motions():
    """Test of ColumnEnsembleClassifier on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    X_test, y_test = load_basic_motions(split="test")
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)
    tde = TemporalDictionaryEnsemble(
        n_parameter_samples=10,
        max_ensemble_size=5,
        randomly_selected_params=5,
        random_state=0,
    )
    drcif = DrCIF(n_estimators=10, random_state=0)
    estimators = [
        ("TDE", tde, [3, 4]),
        ("DrCIF", drcif, [5]),
    ]

    # train column ensemble
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_basic_motions_probas,
                                      decimal=2)
Example #7
0
def test_drcif_train_estimate():
    """Test of DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")

    # train DrCIF
    drcif = DrCIF(
        n_estimators=2,
        n_intervals=2,
        att_subsample_size=2,
        random_state=0,
        save_transformed_data=True,
    )
    drcif.fit(X_train, y_train)

    # test train estimate
    train_probas = drcif._get_train_probs(X_train, y_train)
    assert train_probas.shape == (20, 2)
    train_preds = drcif.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.6
Example #8
0
def test_col_ens_on_unit_test_data():
    """Test of ColumnEnsembleClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)
    drcif = DrCIF(n_estimators=10, random_state=0)
    estimators = [("DrCIF", drcif, [0])]
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_unit_test_probas,
                                      decimal=2)
Example #9
0
def test_drcif_on_unit_test_data():
    """Test of DrCIF on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train DrCIF
    drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True)
    drcif.fit(X_train, y_train)

    # assert probabilities are the same
    probas = drcif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, drcif_unit_test_probas)

    # test train estimate
    train_probas = drcif._get_train_probs(X_train, y_train)
    train_preds = drcif.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.85
Example #10
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Example #11
0
 )
 _print_array(
     "CanonicalIntervalForest - UnitTest",
     _reproduce_classification_unit_test(
         CanonicalIntervalForest(n_estimators=10, random_state=0)
     ),
 )
 _print_array(
     "CanonicalIntervalForest - BasicMotions",
     _reproduce_classification_basic_motions(
         CanonicalIntervalForest(n_estimators=10, random_state=0)
     ),
 )
 _print_array(
     "DrCIF - UnitTest",
     _reproduce_classification_unit_test(DrCIF(n_estimators=10, random_state=0)),
 )
 _print_array(
     "DrCIF - BasicMotions",
     _reproduce_classification_basic_motions(DrCIF(n_estimators=10, random_state=0)),
 )
 _print_array(
     "RandomIntervalSpectralEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         RandomIntervalSpectralEnsemble(n_estimators=10, random_state=0)
     ),
 )
 _print_array(
     "SupervisedTimeSeriesForest - UnitTest",
     _reproduce_classification_unit_test(
         SupervisedTimeSeriesForest(n_estimators=10, random_state=0)
Example #12
0
                                 att_subsample_size=4,
                                 random_state=0)),
 )
 _print_array(
     "CanonicalIntervalForest - BasicMotions",
     _reproduce_classification_basic_motions(
         CanonicalIntervalForest(n_estimators=10,
                                 n_intervals=2,
                                 att_subsample_size=4,
                                 random_state=0)),
 )
 _print_array(
     "DrCIF - UnitTest",
     _reproduce_classification_unit_test(
         DrCIF(n_estimators=10,
               n_intervals=2,
               att_subsample_size=4,
               random_state=0)),
 )
 _print_array(
     "DrCIF - BasicMotions",
     _reproduce_classification_basic_motions(
         DrCIF(n_estimators=10,
               n_intervals=2,
               att_subsample_size=4,
               random_state=0)),
 )
 _print_array(
     "RandomIntervalSpectralEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         RandomIntervalSpectralEnsemble(n_estimators=10, random_state=0)),
 )
Example #13
0
         )),
 )
 _print_array(
     "CanonicalIntervalForest - UnitTest",
     _reproduce_classification_unit_test(
         CanonicalIntervalForest(n_estimators=10, random_state=0)),
 )
 _print_array(
     "CanonicalIntervalForest - BasicMotions",
     _reproduce_classification_basic_motions(
         CanonicalIntervalForest(n_estimators=10, random_state=0)),
 )
 _print_array(
     "DrCIF - UnitTest",
     _reproduce_classification_unit_test(
         DrCIF(n_estimators=10, random_state=0)),
 )
 _print_array(
     "DrCIF - BasicMotions",
     _reproduce_classification_basic_motions(
         DrCIF(n_estimators=10, random_state=0)),
 )
 _print_array(
     "RandomIntervalSpectralEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         RandomIntervalSpectralEnsemble(n_estimators=10, random_state=0)),
 )
 _print_array(
     "SupervisedTimeSeriesForest - UnitTest",
     _reproduce_classification_unit_test(
         SupervisedTimeSeriesForest(n_estimators=10, random_state=0)),