Exemple #1
0
def test_cif_on_basic_motions():
    """Test of CanonicalIntervalForest on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    X_test, y_test = load_basic_motions(split="test")
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)

    # train CIF
    cif = CanonicalIntervalForest(n_estimators=10, random_state=0)
    cif.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = cif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, cif_basic_motions_probas)
Exemple #2
0
def test_cif_on_unit_test_data():
    """Test of CanonicalIntervalForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train CIF
    cif = CanonicalIntervalForest(n_estimators=10, random_state=0)
    cif.fit(X_train, y_train)

    # assert probabilities are the same
    probas = cif.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, cif_unit_test_probas)
Exemple #3
0
def test_sklearn_cross_validation(data_args):
    """Test sklearn cross-validation works with sktime panel data and classifiers."""
    clf = CanonicalIntervalForest.create_test_instance()
    fit_args = _make_args(clf, "fit", **data_args)

    scores = cross_val_score(clf, *fit_args, cv=KFold(n_splits=3))
    assert isinstance(scores, np.ndarray)
Exemple #4
0
def test_sklearn_cross_validation_iterators(data_args, cross_validation_method):
    """Test if sklearn cross-validation iterators can handle sktime panel data."""
    clf = CanonicalIntervalForest.create_test_instance()
    fit_args = _make_args(clf, "fit", **data_args)
    groups = [1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10]

    for train, test in cross_validation_method.split(*fit_args, groups=groups):
        assert isinstance(train, np.ndarray) and isinstance(test, np.ndarray)
Exemple #5
0
def test_sklearn_composite_classifiers(data_args, composite_classifier):
    """Test if sklearn composite classifiers can handle sktime data and classifiers."""
    base_clf = CanonicalIntervalForest()
    fit_args = _make_args(base_clf, "fit", **data_args)
    composite_classifier.fit(*fit_args)

    predict_args = _make_args(base_clf, "predict", **data_args)
    preds = composite_classifier.predict(*predict_args)
    assert isinstance(preds, np.ndarray)
Exemple #6
0
def test_sklearn_parameter_tuning(data_args, parameter_tuning_method):
    """Test if sklearn parameter tuners can handle sktime panel data and classifiers."""
    clf = CanonicalIntervalForest.create_test_instance()
    param_grid = {"n_intervals": [2, 3], "att_subsample_size": [2, 3]}
    fit_args = _make_args(clf, "fit", **data_args)

    parameter_tuning_method = parameter_tuning_method(
        clf, param_grid, cv=KFold(n_splits=3)
    )
    parameter_tuning_method.fit(*fit_args)
    assert isinstance(parameter_tuning_method.best_estimator_, CanonicalIntervalForest)
Exemple #7
0
    def _fit_estimator(self, X, y, i):
        rs = 255 if self.random_state == 0 else self.random_state
        rs = None if self.random_state is None else rs * 37 * (i + 1)
        rng = check_random_state(rs)

        estimator = _clone_estimator(
            CanonicalIntervalForest() if self.estimator is None else self.estimator,
            rng,
        )

        estimator.fit(X[:, :, : self._classification_points[i]], y)

        m = getattr(estimator, "n_jobs", None)
        if m is not None:
            estimator.n_jobs = self._threads_to_use

        return estimator
Exemple #8
0
def test_dtc_on_unit_test_data():
    """Test of CanonicalIntervalForest on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train CIF with the sklearn decision tree classifier
    cif = CanonicalIntervalForest(n_estimators=10,
                                  base_estimator="dtc",
                                  random_state=0)
    cif.fit(X_train, y_train)

    cif.predict_proba(X_test.iloc[indices])
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.
            For classifiers, a "default" set of parameters should be provided for
            general testing, and a "results_comparison" set for comparing against
            previously recorded results if the general set does not produce suitable
            probabilities to compare against.

        Returns
        -------
        params : dict or list of dict, default={}
            Parameters to create testing instances of the class.
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`.
        """
        from sktime.classification.dictionary_based import ContractableBOSS
        from sktime.classification.interval_based import CanonicalIntervalForest
        from sktime.classification.interval_based import (
            TimeSeriesForestClassifier as TSFC,
        )

        if parameter_set == "results_comparison":
            cboss = ContractableBOSS(
                n_parameter_samples=4, max_ensemble_size=2, random_state=0
            )
            cif = CanonicalIntervalForest(
                n_estimators=2, n_intervals=4, att_subsample_size=4, random_state=0
            )
            return {"estimators": [("cBOSS", cboss, 5), ("CIF", cif, [3, 4])]}
        else:
            return {
                "estimators": [
                    ("tsf1", TSFC(n_estimators=2), 0),
                    ("tsf2", TSFC(n_estimators=2), 0),
                ]
            }
Exemple #10
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Exemple #11
0
    GroupKFold(n_splits=2),
    LeavePGroupsOut(n_groups=5),
    GroupShuffleSplit(n_splits=2, test_size=0.25),
    TimeSeriesSplit(n_splits=2),
]
PARAMETER_TUNING_METHODS = [
    GridSearchCV,
    RandomizedSearchCV,
    HalvingGridSearchCV,
    HalvingRandomSearchCV,
]
COMPOSITE_ESTIMATORS = [
    Pipeline(
        [
            ("transform", PCATransformer()),
            ("clf", CanonicalIntervalForest.create_test_instance()),
        ]
    ),
    VotingClassifier(
        estimators=[
            ("clf1", CanonicalIntervalForest.create_test_instance()),
            ("clf2", CanonicalIntervalForest.create_test_instance()),
            ("clf3", CanonicalIntervalForest.create_test_instance()),
        ]
    ),
    CalibratedClassifierCV(
        base_estimator=CanonicalIntervalForest.create_test_instance(),
        cv=3,
    ),
]
Exemple #12
0
                 "max_shapelets": 20,
             },
             drcif_params={"n_estimators": 10},
             arsenal_params={"num_kernels": 100, "n_estimators": 5},
             tde_params={
                 "n_parameter_samples": 10,
                 "max_ensemble_size": 5,
                 "randomly_selected_params": 5,
             },
         )
     ),
 )
 _print_array(
     "CanonicalIntervalForest - UnitTest",
     _reproduce_classification_unit_test(
         CanonicalIntervalForest(n_estimators=10, random_state=0)
     ),
 )
 _print_array(
     "CanonicalIntervalForest - BasicMotions",
     _reproduce_classification_basic_motions(
         CanonicalIntervalForest(n_estimators=10, random_state=0)
     ),
 )
 _print_array(
     "DrCIF - UnitTest",
     _reproduce_classification_unit_test(DrCIF(n_estimators=10, random_state=0)),
 )
 _print_array(
     "DrCIF - BasicMotions",
     _reproduce_classification_basic_motions(DrCIF(n_estimators=10, random_state=0)),
Exemple #13
0
 _print_array(
     "ColumnEnsembleClassifier - BasicMotions",
     _reproduce_classification_basic_motions(
         ColumnEnsembleClassifier(estimators=[
             (
                 "cBOSS",
                 ContractableBOSS(n_parameter_samples=4,
                                  max_ensemble_size=2,
                                  random_state=0),
                 [5],
             ),
             (
                 "CIF",
                 CanonicalIntervalForest(
                     n_estimators=2,
                     n_intervals=4,
                     att_subsample_size=4,
                     random_state=0,
                 ),
                 [3, 4],
             ),
         ])),
 )
 _print_array(
     "BOSSEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         BOSSEnsemble(max_ensemble_size=5, random_state=0)),
 )
 _print_array(
     "ContractableBOSS - UnitTest",
     _reproduce_classification_unit_test(
         ContractableBOSS(n_parameter_samples=10,
Exemple #14
0
        load_and_run_classification_experiment(
            problem_path=data_dir,
            results_path=results_dir,
            classifier=set_classifier(classifier, resample, tf),
            cls_name=classifier,
            dataset=dataset,
            resample_id=resample,
            build_train=tf,
            predefined_resample=predefined_resample,
        )
    else:  # Local run
        print(" Local Run")
        data_dir = "../datasets/data/"
        results_dir = "C:/Temp/"
        cls_name = "CIF"
        classifier = CanonicalIntervalForest()
        dataset = "UnitTest"
        resample = 0
        tf = False
        predefined_resample = False

        load_and_run_classification_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name=cls_name,
            classifier=classifier,
            dataset=dataset,
            resample_id=resample,
            build_train=tf,
            predefined_resample=predefined_resample,