def test_prob_threshold_on_unit_test_data(): """Test of ProbabilityThresholdEarlyClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train probability threshold pt = ProbabilityThresholdEarlyClassifier( random_state=0, classification_points=[6, 16, 24], probability_threshold=1, estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0), ) pt.fit(X_train, y_train) final_probas = np.zeros((10, 2)) final_decisions = np.zeros(10) X_test = from_nested_to_3d_numpy(X_test) states = None for i in pt.classification_points: X = X_test[indices, :, :i] probas = pt.predict_proba(X) decisions, states = pt.decide_prediction_safety(X, probas, states) for n in range(10): if decisions[n] and final_decisions[n] == 0: final_probas[n] = probas[n] final_decisions[n] = i testing.assert_array_equal(final_probas, pt_unit_test_probas)
def test_run_clustering_experiment(): """Test running and saving results for clustering. Currently it just checks the files have been created, then deletes them. """ dataset = "UnitTest" train_X, train_Y = load_unit_test("TRAIN", return_X_y=True) test_X, test_Y = load_unit_test("TEST", return_X_y=True) run_clustering_experiment( train_X, TimeSeriesKMeans(n_clusters=2), results_path="../Temp/", trainY=train_Y, testX=test_X, testY=test_Y, cls_name="kmeans", dataset_name=dataset, resample_id=0, ) test_path = f"../Temp/kmeans/Predictions/{dataset}/testResample0.csv" train_path = f"../Temp/kmeans/Predictions/{dataset}/trainResample0.csv" assert os.path.isfile(test_path) assert os.path.isfile(train_path) os.remove(test_path) os.remove(train_path)
def test_tsf_predictions(n_estimators, n_intervals): """Test TSF predictions.""" random_state = 1234 X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") features = [np.mean, np.std, _slope] steps = [ ( "transform", RandomIntervalFeatureExtractor(random_state=random_state, features=features), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = ComposableTimeSeriesForestClassifier(estimator=estimator, random_state=random_state, n_estimators=n_estimators) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = ComposableTimeSeriesForestClassifier(random_state=random_state, n_estimators=n_estimators) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def test_stc_on_unit_test_data(): """Test of ShapeletTransformClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train STC stc = ShapeletTransformClassifier( estimator=RotationForest(n_estimators=3), max_shapelets=20, n_shapelet_samples=500, batch_size=100, random_state=0, save_transformed_data=True, ) stc.fit(X_train, y_train) # assert probabilities are the same probas = stc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stc_unit_test_probas) # test train estimate train_probas = stc._get_train_probs(X_train, y_train) train_preds = stc.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def test_hivecote_v1_on_unit_test_data(): """Test of HIVECOTEV1 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train HIVE-COTE v1 hc1 = HIVECOTEV1( random_state=0, stc_params={ "estimator": RotationForest(n_estimators=3), "n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, tsf_params={"n_estimators": 10}, rise_params={"n_estimators": 10}, cboss_params={ "n_parameter_samples": 25, "max_ensemble_size": 5 }, ) hc1.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = hc1.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, hivecote_v1_unit_test_probas, decimal=2)
def _reproduce_classification_unit_test(estimator): X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) estimator.fit(X_train, y_train) return estimator.predict_proba(X_test.iloc[indices])
def test_classifier_on_unit_test_data(self, estimator_class): """Test classifier on unit test data.""" # we only use the first estimator instance for testing classname = estimator_class.__name__ # retrieve expected predict_proba output, and skip test if not available if classname in unit_test_proba.keys(): expected_probas = unit_test_proba[classname] else: # skip test if no expected probas are registered return None # we only use the first estimator instance for testing estimator_instance = clone( estimator_class.create_test_instance( parameter_set="results_comparison")) # set random seed if possible if "random_state" in estimator_instance.get_params().keys(): estimator_instance.set_params(random_state=0) # load unit test data X_train, y_train = load_unit_test(split="train") X_test, _ = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train classifier and predict probas estimator_instance.fit(X_train, y_train) y_proba = estimator_instance.predict_proba(X_test.iloc[indices]) # assert probabilities are the same _assert_array_almost_equal(y_proba, expected_probas, decimal=2)
def test_col_ens_on_unit_test_data(): """Test of ColumnEnsembleClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train Column ensemble with a single fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) estimators = [("FreshPrince", fp, [0])] col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_unit_test_probas, decimal=2)
def test_run_clustering_experiment(tmp_path): """Test running and saving results for clustering. Currently it just checks the files have been created, then deletes them. """ dataset = "UnitTest" train_X, train_Y = load_unit_test("TRAIN") test_X, test_Y = load_unit_test("TEST") run_clustering_experiment( train_X, TimeSeriesKMeans(n_clusters=2), results_path=tmp_path, trainY=train_Y, testX=test_X, testY=test_Y, cls_name="kmeans", dataset_name=dataset, resample_id=0, ) test_path = tmp_path.joinpath( f"kmeans/Predictions/{dataset}/testResample0.csv") train_path = tmp_path.joinpath( f"kmeans/Predictions/{dataset}/trainResample0.csv") assert test_path.is_file() assert train_path.is_file() # remove files test_path.unlink() train_path.unlink()
def test_run_classification_experiment(tmp_path): """Test running and saving results for classifiers. Currently it just checks the files have been created, then deletes them. """ dataset = "UnitTest" train_X, train_Y = load_unit_test("TRAIN") test_X, test_Y = load_unit_test("TEST") run_classification_experiment( train_X, train_Y, test_X, test_Y, TimeSeriesForestClassifier(n_estimators=10), str(tmp_path), cls_name="TSF", dataset="UnitTest", resample_id=0, train_file=True, ) test_path = tmp_path.joinpath( f"TSF/Predictions/{dataset}/testResample0.csv") train_path = tmp_path.joinpath( f"TSF/Predictions/{dataset}/trainResample0.csv") assert test_path.is_file() assert train_path.is_file() # remove files test_path.unlink() train_path.unlink()
def test_cboss_on_unit_test_data(): """Test of cBOSS on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train cBOSS cboss = ContractableBOSS( n_parameter_samples=25, max_ensemble_size=5, random_state=0, save_train_predictions=True, ) cboss.fit(X_train, y_train) # assert probabilities are the same probas = cboss.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, cboss_unit_test_probas, decimal=2) # test train estimate train_probas = cboss._get_train_probs(X_train, y_train) train_preds = cboss.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def test_hivecote_v2_on_unit_test(): """Test of HIVECOTEV2 on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train HIVE-COTE v2 hc2 = HIVECOTEV2( random_state=0, stc_params={ "estimator": RotationForest(n_estimators=3), "n_shapelet_samples": 500, "max_shapelets": 20, "batch_size": 100, }, drcif_params={"n_estimators": 10}, arsenal_params={ "num_kernels": 100, "n_estimators": 5 }, tde_params={ "n_parameter_samples": 10, "max_ensemble_size": 5, "randomly_selected_params": 5, }, ) hc2.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = hc2.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, hivecote_v2_unit_test_probas)
def test_tde_on_unit_test_data(): """Test of TDE on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train TDE tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) tde.fit(X_train, y_train) # assert probabilities are the same probas = tde.predict_proba(X_test.iloc[indices]).round(6) testing.assert_array_equal(probas, tde_unit_test_probas) # test loocv train estimate train_probas = tde._get_train_probs(X_train, y_train) train_preds = tde.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.85 # test oob estimate train_probas = tde._get_train_probs(X_train, y_train, train_estimate_method="oob") train_preds = tde.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.8
def test_fresh_prince_on_unit_test_data(): """Test of FreshPRINCE on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train FreshPRINCE classifier fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, save_transformed_data=True, ) fp.fit(X_train, y_train) # assert probabilities are the same probas = fp.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, fp_classifier_unit_test_probas, decimal=2) # test train estimate train_probas = fp._get_train_probs(X_train, y_train) train_preds = fp.classes_[np.argmax(train_probas, axis=1)] assert accuracy_score(y_train, train_preds) >= 0.75
def test_run_classification_experiment(): """Test running and saving results for classifiers. Currently it just checks the files have been created, then deletes them. """ dataset = "UnitTest" train_X, train_Y = load_unit_test("TRAIN", return_X_y=True) test_X, test_Y = load_unit_test("TEST", return_X_y=True) run_classification_experiment( train_X, train_Y, test_X, test_Y, TimeSeriesForestClassifier(n_estimators=10), "../Temp/", cls_name="TSF", dataset="UnitTest", resample_id=0, train_file=True, ) test_path = f"../Temp/TSF/Predictions/{dataset}/testResample0.csv" train_path = f"../Temp/TSF/Predictions/{dataset}/trainResample0.csv" assert os.path.isfile(test_path) assert os.path.isfile(train_path) os.remove(test_path) os.remove(train_path)
def compare_classifiers(): """Build pipeline classifiers and compare to published results.""" # Data set list X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) # Define Transformer pipeline print(y_train) print(type(y_train)) print(type(y_train[0]))
def test_contracted_arsenal_on_unit_test_data(): """Test of contracted Arsenal on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) # train contracted DrCIF arsenal = Arsenal(time_limit_in_minutes=0.025, random_state=0) arsenal.fit(X_train, y_train) assert len(arsenal.estimators_) > 1 assert accuracy_score(y_test, arsenal.predict(X_test)) >= 0.8
def test_contracted_drcif_on_unit_test_data(): """Test of contracted DrCIF on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) # train contracted DrCIF drcif = DrCIF(time_limit_in_minutes=0.025, random_state=0) drcif.fit(X_train, y_train) assert len(drcif.estimators_) > 1 assert accuracy_score(y_test, drcif.predict(X_test)) >= 0.8
def test_tsf_on_unit_test_data(): """Test of TimeSeriesForestClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train TSF tsf = TimeSeriesForestClassifier(n_estimators=10, random_state=0) tsf.fit(X_train, y_train) # assert probabilities are the same probas = tsf.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, tsf_unit_test_probas)
def test_stsf_on_unit_test_data(): """Test of SupervisedTimeSeriesForest on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train STSF stsf = SupervisedTimeSeriesForest(n_estimators=10, random_state=0) stsf.fit(X_train, y_train) # assert probabilities are the same probas = stsf.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stsf_unit_test_probas)
def test_risf_on_unit_test_data(): """Test of RandomIntervalSpectralEnsemble on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train RISE rise = RandomIntervalSpectralForest(n_estimators=10, random_state=0) rise.fit(X_train, y_train) # assert probabilities are the same probas = rise.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, rise_unit_test_probas)
def test_dtc_on_unit_test_data(): """Test of CanonicalIntervalForest on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train CIF with the sklearn decision tree classifier cif = CanonicalIntervalForest(n_estimators=10, base_estimator="dtc", random_state=0) cif.fit(X_train, y_train) cif.predict_proba(X_test.iloc[indices])
def test_knn_on_unit_test(): """Test function for elastic knn, to be reinstated soon.""" # load arrowhead data for unit tests X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) for i in range(0, len(distance_functions)): knn = KNeighborsTimeSeriesClassifier(distance=distance_functions[i], ) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 for j in range(0, len(pred)): if pred[j] == y_test[j]: correct = correct + 1 assert correct == expected_correct[distance_functions[i]]
def test_pf_on_unit_test_data(): """Test of ProximityForest on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train PF pf = ProximityForest(n_estimators=5, random_state=0) pf.fit(X_train, y_train) # assert probabilities are the same probas = pf.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, pf_unit_test_probas, decimal=2)
def test_cif_on_unit_test_data(): """Test of CanonicalIntervalForest on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train CIF cif = CanonicalIntervalForest(n_estimators=10, random_state=0) cif.fit(X_train, y_train) # assert probabilities are the same probas = cif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, cif_unit_test_probas)
def test_shapedtw_on_unit_test_data(): """Test of ShapeDTW on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train ShapeDTW shapedtw = ShapeDTW() shapedtw.fit(X_train, y_train) # assert probabilities are the same probas = shapedtw.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, shapedtw_unit_test_probas, decimal=2)
def test_knn_bounding_matrix(): """Test knn with custom bounding parameters.""" X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) for i in range(0, len(distance_functions)): knn = KNeighborsTimeSeriesClassifier( distance=distance_functions[i], distance_params={"window": 0.5} ) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 for j in range(0, len(pred)): if pred[j] == y_test[j]: correct = correct + 1 assert correct == expected_correct[distance_functions[i]]
def test_col_ens_on_unit_test_data(): """Test of ColumnEnsembleClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) drcif = DrCIF(n_estimators=10, random_state=0) estimators = [("DrCIF", drcif, [0])] col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_unit_test_probas, decimal=2)
def test_individual_boss_on_unit_test(): """Test of IndividualBOSS on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train IndividualBOSS indiv_boss = IndividualBOSS(random_state=0) indiv_boss.fit(X_train, y_train) # assert probabilities are the same probas = indiv_boss.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, individual_boss_unit_test_probas, decimal=2)
def test_weasel_on_unit_test_data(): """Test of WEASEL on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train", return_X_y=True) X_test, y_test = load_unit_test(split="test", return_X_y=True) indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train WEASEL weasel = WEASEL(random_state=0, window_inc=4) weasel.fit(X_train, y_train) # assert probabilities are the same probas = weasel.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, weasel_unit_test_probas, decimal=2)