Ejemplo n.º 1
0
def test_weasel_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)

    # train WEASEL
    weasel = WEASEL(random_state=1, binning_strategy="equi-depth")
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    # print(score)
    assert score >= 0.99
Ejemplo n.º 2
0
def test_weasel_on_power_demand():
    # load power demand data
    X_train, y_train = load_italy_power_demand(split='train', return_X_y=True)
    X_test, y_test = load_italy_power_demand(split='test', return_X_y=True)

    # train WEASEL
    weasel = WEASEL(random_state=47)
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    print(score)
    assert (score >= 0.94)
Ejemplo n.º 3
0
def test_weasel_on_power_demand():
    # load power demand data
    X_train, y_train = load_italy_power_demand(split="train", return_X_y=True)
    X_test, y_test = load_italy_power_demand(split="test", return_X_y=True)

    # train WEASEL
    weasel = WEASEL(random_state=1, binning_strategy="kmeans")
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    # print(score)
    assert score >= 0.94
Ejemplo n.º 4
0
def test_weasel_on_gunpoint():
    # load gunpoint data
    X_train, y_train = load_gunpoint(split="train", return_X_y=True)
    X_test, y_test = load_gunpoint(split="test", return_X_y=True)
    # indices = np.random.RandomState(0).permutation(10)

    # train WEASEL
    weasel = WEASEL(random_state=1379)
    weasel.fit(X_train, y_train)

    score = weasel.score(X_test, y_test)
    # print(score)
    assert score >= 0.99
Ejemplo n.º 5
0
def test_weasel_on_unit_test_data():
    """Test of WEASEL on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train WEASEL
    weasel = WEASEL(random_state=0, window_inc=4)
    weasel.fit(X_train, y_train)

    # assert probabilities are the same
    probas = weasel.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      weasel_unit_test_probas,
                                      decimal=2)
Ejemplo n.º 6
0
def set_classifier(cls, resampleId=None):
    """
    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducability. You can set up bespoke classifier in many other ways.

    :param cls: String indicating which classifier you want
    :param resampleId: classifier random seed

    :return: A classifier.

    """
    name = cls.lower()
    # Distance based
    if name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resampleId)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resampleId)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resampleId)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Dictionary based
    elif name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resampleId)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resampleId)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif name == "weasel":
        return WEASEL(random_state=resampleId)
    elif name == "muse":
        return MUSE(random_state=resampleId)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resampleId)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resampleId)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resampleId)
    elif name == "drcif":
        return DrCIF(random_state=resampleId)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resampleId, time_contract_in_mins=1
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    elif name == "rocket":
        return ROCKETClassifier(random_state=resampleId)
    elif name == "arsenal":
        return Arsenal(random_state=resampleId)
    # Hybrid
    elif name == "catch22":
        return Catch22ForestClassifier(random_state=resampleId)
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resampleId)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Ejemplo n.º 7
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Ejemplo n.º 8
0
     ),
 )
 _print_array(
     "TemporalDictionaryEnsemble - BasicMotions",
     _reproduce_classification_basic_motions(
         TemporalDictionaryEnsemble(
             n_parameter_samples=10,
             max_ensemble_size=5,
             randomly_selected_params=5,
             random_state=0,
         )
     ),
 )
 _print_array(
     "WEASEL - UnitTest",
     _reproduce_classification_unit_test(WEASEL(random_state=0, window_inc=4)),
 )
 _print_array(
     "ElasticEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         ElasticEnsemble(
             proportion_of_param_options=0.1,
             proportion_train_for_test=0.1,
             random_state=0,
         )
     ),
 )
 _print_array(
     "ProximityForest - UnitTest",
     _reproduce_classification_unit_test(
         ProximityForest(n_estimators=5, random_state=0)
Ejemplo n.º 9
0
         )),
 )
 _print_array(
     "TemporalDictionaryEnsemble - BasicMotions",
     _reproduce_classification_basic_motions(
         TemporalDictionaryEnsemble(
             n_parameter_samples=10,
             max_ensemble_size=5,
             randomly_selected_params=5,
             random_state=0,
         )),
 )
 _print_array(
     "WEASEL - UnitTest",
     _reproduce_classification_unit_test(
         WEASEL(window_inc=4, random_state=0)),
 )
 _print_array(
     "ElasticEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         ElasticEnsemble(
             proportion_of_param_options=0.1,
             proportion_train_for_test=0.1,
             majority_vote=True,
             distance_measures=["dtw", "ddtw", "wdtw"],
             random_state=0,
         )),
 )
 _print_array(
     "ProximityForest - UnitTest",
     _reproduce_classification_unit_test(
norm_data = norm_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()),
                            axis=1)
X_norm = norm_data.values

#label binário
lb = LabelBinarizer()
y = lb.fit_transform(label)
y = y.reshape(-1)[:]

#será necessário converter os dados de tabular para nested para aplicar algoritmos da sktime
X_nested = from_2d_array_to_nested(X_norm)[:]

#definição dos modelos e parametros
model_params = {
    'WEASEL': {
        'model': WEASEL(),
        'params': {
            'window_inc': [2, 3, 4, 5, 6],
            'random_state': [1]
        }
    }
}

#definição das métricas e parametros
scoring = {
    'acc': 'accuracy',
    'prec': make_scorer(precision_score, pos_label=pos_label),
    'avg_prec': make_scorer(average_precision_score, pos_label=pos_label),
    'recall': make_scorer(recall_score, pos_label=pos_label),
    'f1': make_scorer(f1_score, pos_label=pos_label),
    'bal_acc': 'balanced_accuracy'
Ejemplo n.º 11
0
    def _fit_estimator(self, X, y, i):
        rs = 255 if self.random_state == 0 else self.random_state
        rs = None if self.random_state is None else rs * 37 * (i + 1)
        rng = check_random_state(rs)

        default = MUSE() if X.shape[1] > 1 else WEASEL()
        estimator = _clone_estimator(
            default if self.estimator is None else self.estimator,
            rng,
        )

        m = getattr(estimator, "n_jobs", None)
        if m is not None:
            estimator.n_jobs = self._threads_to_use

        # fit estimator for this threshold
        estimator.fit(X[:, :, : self._classification_points[i]], y)

        # get train set probability estimates for this estimator
        if callable(getattr(estimator, "_get_train_probs", None)) and (
            getattr(estimator, "_save_transformed_data", False)
            or getattr(estimator, "_save_train_predictions", False)
        ):
            train_probas = estimator._get_train_probs(X, y)
        else:
            cv_size = 5
            _, counts = np.unique(y, return_counts=True)
            min_class = np.min(counts)
            if min_class < cv_size:
                cv_size = min_class

            train_probas = cross_val_predict(
                estimator, X, y=y, cv=cv_size, method="predict_proba"
            )

        train_preds = [
            int(rng.choice(np.flatnonzero(prob == prob.max()))) for prob in train_probas
        ]

        # create train set for the one class classifier using train probas with the
        # minimum difference to the predicted probability
        train_probas = self._generate_one_class_features(X, train_preds, train_probas)
        X_oc = []
        for i in range(len(X)):
            if train_preds[i] == self._class_dictionary[y[i]]:
                X_oc.append(train_probas[i])

        # fit one class classifier and grid search parameters if a grid is provided
        one_class_classifier = None
        if len(X_oc) > 1:
            one_class_classifier = (
                OneClassSVM(tol=self._svm_tol, nu=self._svm_nu)
                if self.one_class_classifier is None
                else _clone_estimator(self.one_class_classifier, random_state=rs)
            )
            param_grid = (
                {"gamma": self._svm_gammas}
                if self.one_class_classifier is None
                and self.one_class_param_grid is None
                else self.one_class_param_grid
            )

            cv_size = min(len(X_oc), 10)
            gs = GridSearchCV(
                estimator=one_class_classifier,
                param_grid=param_grid,
                scoring="accuracy",
                cv=cv_size,
            )
            gs.fit(X_oc, np.ones(len(X_oc)))
            one_class_classifier = gs.best_estimator_

        return estimator, one_class_classifier, train_probas, train_preds
Ejemplo n.º 12
0
         )),
 )
 _print_array(
     "TemporalDictionaryEnsemble - BasicMotions",
     _reproduce_classification_basic_motions(
         TemporalDictionaryEnsemble(
             n_parameter_samples=10,
             max_ensemble_size=5,
             randomly_selected_params=5,
             random_state=0,
         )),
 )
 _print_array(
     "WEASEL - UnitTest",
     _reproduce_classification_unit_test(
         WEASEL(random_state=0, window_inc=4)),
 )
 _print_array(
     "ElasticEnsemble - UnitTest",
     _reproduce_classification_unit_test(
         ElasticEnsemble(
             proportion_of_param_options=0.1,
             proportion_train_for_test=0.1,
             random_state=0,
         )),
 )
 _print_array(
     "ProximityForest - UnitTest",
     _reproduce_classification_unit_test(
         ProximityForest(n_estimators=5, random_state=0)),
 )