def test_col_ens_on_unit_test_data(): """Test of ColumnEnsembleClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train Column ensemble with a single fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) estimators = [("FreshPrince", fp, [0])] col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_unit_test_probas, decimal=2)
def test_col_ens_on_basic_motions(): """Test of ColumnEnsembleClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True) estimators = [ ("FreshPrince", fp, [0, 1, 2]), ("TDE", tde, [3, 4]), ("DrCIF", drcif, [5]), ] # train column ensemble col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_basic_motions_probas, decimal=2)
def test_fresh_prince_on_unit_test_data(): """Test of FreshPRINCE on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train FreshPRINCE classifier fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, save_transformed_data=True, ) fp.fit(X_train, y_train) score = fp.score(X_test.iloc[indices], y_test[indices]) assert score >= 0.8
def test_fresh_prince_train_estimate(): """Test of FreshPRINCE train estimate on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") # train FreshPRINCE classifier fp = FreshPRINCE( n_estimators=2, default_fc_parameters="minimal", random_state=0, save_transformed_data=True, ) fp.fit(X_train, y_train) # test train estimate train_probas = fp._get_train_probs(X_train, y_train) assert train_probas.shape == (20, 2) train_preds = fp.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.6
def test_fresh_prince_on_unit_test_data(): """Test of FreshPRINCE on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train FreshPRINCE classifier fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, save_transformed_data=True, ) fp.fit(X_train, y_train) # assert probabilities are the same probas = fp.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, fp_classifier_unit_test_probas, decimal=2) # test train estimate train_probas = fp._get_train_probs(X_train, y_train) train_preds = fp.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
) _print_array( "Catch22Classifier - BasicMotions", _reproduce_classification_basic_motions( Catch22Classifier( random_state=0, estimator=RandomForestClassifier(n_estimators=10), ) ), ) _print_array( "FreshPRINCE - UnitTest", _reproduce_classification_unit_test( FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) ), ) _print_array( "MatrixProfileClassifier - UnitTest", _reproduce_classification_unit_test(MatrixProfileClassifier(random_state=0)), ) _print_array( "RandomIntervalClassifier - UnitTest", _reproduce_classification_unit_test( RandomIntervalClassifier( random_state=0, n_intervals=5, interval_transformers=SummaryTransformer(
def set_classifier(cls, resample_id=None, train_file=False): """Construct a classifier, possibly seeded. Basic way of creating the classifier to build using the default settings. This set up is to help with batch jobs for multiple problems to facilitate easy reproducibility for use with load_and_run_classification_experiment. You can pass a classifier object instead to run_classification_experiment. Parameters ---------- cls : str String indicating which classifier you want. resample_id : int or None, default=None Classifier random seed. train_file : bool, default=False Whether a train file is being produced. Return ------ classifier : A BaseClassifier. The classifier matching the input classifier name. """ name = cls.lower() # Dictionary based if name == "boss" or name == "bossensemble": return BOSSEnsemble(random_state=resample_id) elif name == "cboss" or name == "contractableboss": return ContractableBOSS(random_state=resample_id) elif name == "tde" or name == "temporaldictionaryensemble": return TemporalDictionaryEnsemble(random_state=resample_id, save_train_predictions=train_file) elif name == "weasel": return WEASEL(random_state=resample_id) elif name == "muse": return MUSE(random_state=resample_id) # Distance based elif name == "pf" or name == "proximityforest": return ProximityForest(random_state=resample_id) elif name == "pt" or name == "proximitytree": return ProximityTree(random_state=resample_id) elif name == "ps" or name == "proximityStump": return ProximityStump(random_state=resample_id) elif name == "dtwcv" or name == "kneighborstimeseriesclassifier": return KNeighborsTimeSeriesClassifier(distance="dtwcv") elif name == "dtw" or name == "1nn-dtw": return KNeighborsTimeSeriesClassifier(distance="dtw") elif name == "msm" or name == "1nn-msm": return KNeighborsTimeSeriesClassifier(distance="msm") elif name == "ee" or name == "elasticensemble": return ElasticEnsemble(random_state=resample_id) elif name == "shapedtw": return ShapeDTW() # Feature based elif name == "summary": return SummaryClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)) elif name == "summary-intervals": return RandomIntervalClassifier( random_state=resample_id, interval_transformers=SummaryTransformer( summary_function=("mean", "std", "min", "max"), quantiles=(0.25, 0.5, 0.75), ), estimator=RandomForestClassifier(n_estimators=500), ) elif name == "summary-catch22": return RandomIntervalClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)) elif name == "catch22": return Catch22Classifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)) elif name == "matrixprofile": return MatrixProfileClassifier(random_state=resample_id) elif name == "signature": return SignatureClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), ) elif name == "tsfresh": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)) elif name == "tsfresh-r": return TSFreshClassifier( random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500), relevant_feature_extractor=True, ) elif name == "freshprince": return FreshPRINCE(random_state=resample_id, save_transformed_data=train_file) # Hybrid elif name == "hc1" or name == "hivecotev1": return HIVECOTEV1(random_state=resample_id) elif name == "hc2" or name == "hivecotev2": return HIVECOTEV2(random_state=resample_id) # Interval based elif name == "rise" or name == "randomintervalspectralforest": return RandomIntervalSpectralForest(random_state=resample_id, n_estimators=500) elif name == "tsf" or name == "timeseriesforestclassifier": return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500) elif name == "cif" or name == "canonicalintervalforest": return CanonicalIntervalForest(random_state=resample_id, n_estimators=500) elif name == "stsf" or name == "supervisedtimeseriesforest": return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500) elif name == "drcif": return DrCIF(random_state=resample_id, n_estimators=500, save_transformed_data=train_file) # Kernel based elif name == "rocket": return RocketClassifier(random_state=resample_id) elif name == "mini-rocket": return RocketClassifier(random_state=resample_id, rocket_transform="minirocket") elif name == "multi-rocket": return RocketClassifier(random_state=resample_id, rocket_transform="multirocket") elif name == "arsenal": return Arsenal(random_state=resample_id, save_transformed_data=train_file) elif name == "mini-arsenal": return Arsenal( random_state=resample_id, save_transformed_data=train_file, rocket_transform="minirocket", ) elif name == "multi-arsenal": return Arsenal( random_state=resample_id, save_transformed_data=train_file, rocket_transform="multirocket", ) # Shapelet based elif name == "stc" or name == "shapelettransformclassifier": return ShapeletTransformClassifier( transform_limit_in_minutes=120, random_state=resample_id, save_transformed_data=train_file, ) else: raise Exception("UNKNOWN CLASSIFIER")