def test_knn_on_arrowhead(): # load gunpoint data X_train, y_train = load_arrow_head(split="train", return_X_y=True) X_test, y_test = load_arrow_head(split="test", return_X_y=True) for i in range(0, len(distance_functions)): knn = KNeighborsTimeSeriesClassifier(distance=distance_functions[i], ) knn.fit(X_train, y_train) pred = knn.predict(X_test) correct = 0 for j in range(0, len(pred)): if pred[j] == y_test[j]: correct = correct + 1 assert correct == expected_correct[distance_functions[i]]
def time_clusterers(): """Time tests for clusterers.""" k_means = TimeSeriesKMeans( n_clusters=5, # Number of desired centers init_algorithm="forgy", # Center initialisation technique max_iter= 10, # Maximum number of iterations for refinement on training set metric="dtw", # Distance metric to use averaging_method="mean", # Averaging technique to use random_state=1, ) X_train, y_train = load_arrow_head(split="train") X_test, y_test = load_arrow_head(split="test") k_means.fit(X_train) plot_cluster_algorithm(k_means, X_test, k_means.n_clusters)
def test_load_UCR_UEA_dataset_download(tmpdir): # tmpdir is a pytest fixture extract_path = tmpdir.mkdtemp() name = "ArrowHead" actual_X, actual_y = load_UCR_UEA_dataset(name, return_X_y=True, extract_path=extract_path) data_path = os.path.join(extract_path, name) assert os.path.exists(data_path) # check files files = [ f"{name}.txt", f"{name}_TEST.arff", f"{name}_TEST.ts", f"{name}_TEST.txt", f"{name}_TRAIN.arff", f"{name}_TRAIN.ts", f"{name}_TRAIN.txt", # "README.md", ] for file in os.listdir(data_path): assert file in files files.remove(file) assert len(files) == 0 # check data expected_X, expected_y = load_arrow_head(return_X_y=True) _assert_array_almost_equal(actual_X, expected_X, decimal=4) np.testing.assert_array_equal(expected_y, actual_y)
def test_center_init(center_init_callable: Callable[[np.ndarray], np.ndarray]): """Test center initialisation algorithms.""" k = 5 X, y = load_arrow_head(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y) X_train = convert_to(X_train, "numpy3D") random_state = check_random_state(1) test_centers = center_init_callable(X_train, k, random_state) assert len(test_centers) == k assert len(np.unique(test_centers, axis=1)) == k
def plot_dba_example(): """Plot dba.""" import matplotlib.pyplot as plt X_train, y_train = load_arrow_head(split="train") X_train = convert_to(X_train, "numpy3D") def plot_helper(barycenter): for series in X_train: plt.plot(series.ravel(), "k-", alpha=0.2) plt.plot(barycenter.ravel(), "r-", linewidth=2) ax1 = plt.subplot() plt.subplot(4, 1, 1, sharex=ax1) plt.title("Sktime DBA (using dtw)") plot_helper( dba(X_train, distance_metric="dtw", medoids_distance_metric="dtw")) plt.subplot(4, 1, 2, sharex=ax1) plt.title("Sktime DBA (using wdtw)") plot_helper( dba(X_train, distance_metric="wdtw", medoids_distance_metric="wdtw")) plt.subplot(4, 1, 3, sharex=ax1) plt.title("Sktime DBA (using lcss)") plot_helper( dba(X_train, distance_metric="lcss", medoids_distance_metric="lcss")) plt.subplot(4, 1, 4, sharex=ax1) plt.title("Sktime DBA (using msm)") plot_helper(dba(X_train, distance_metric="msm")) ax1.set_xlim([0, X_train.shape[2]]) # show the plot(s) plt.tight_layout() plt.show()
cv = SingleSplit(random_state=1) train_idx, test_idx = next(cv.split(data)) train = data.iloc[train_idx, :] test = data.iloc[test_idx, :] strategy.fit(task, train) expected = strategy.predict(test) # compare results np.testing.assert_array_equal(actual, expected) # extensive tests of orchestration and metric evaluation against sklearn @pytest.mark.parametrize( "dataset", [ RAMDataset(dataset=load_arrow_head(return_X_y=False), name="ArrowHead"), UEADataset(path=DATAPATH, name="GunPoint", target_name="class_val"), ], ) @pytest.mark.parametrize("cv", [ SingleSplit(random_state=1), StratifiedKFold(random_state=1, shuffle=True) ]) @pytest.mark.parametrize( "metric_func", [accuracy_score, f1_score] # pairwise metric # composite metric ) @pytest.mark.parametrize("results_cls", [RAMResults, HDDResults]) @pytest.mark.parametrize( "estimator",
# -*- coding: utf-8 -*- """Clustering usage tests and examples.""" import numpy as np from sktime.clustering.k_means import TimeSeriesKMeans from sktime.clustering.k_medoids import TimeSeriesKMedoids from sktime.datasets import load_arrow_head def form_cluster_list(clusters, n) -> np.array: """Form a cluster list.""" preds = np.zeros(n) for i in range(len(clusters)): for j in range(len(clusters[i])): preds[clusters[i][j]] = i return preds if __name__ == "__main__": clusterer1 = TimeSeriesKMeans(n_clusters=5, max_iter=50, averaging_algorithm="mean") clusterer2 = TimeSeriesKMedoids() X, y = load_arrow_head(return_X_y=True) clusterer1.fit(X) c = clusterer1.predict(X) x = form_cluster_list(c, len(y)) for i in range(len(x)): print(i, " is in cluster ", x[i])
cv = SingleSplit(random_state=1) train_idx, test_idx = next(cv.split(data)) train = data.iloc[train_idx, :] test = data.iloc[test_idx, :] strategy.fit(task, train) expected = strategy.predict(test) # compare results np.testing.assert_array_equal(actual, expected) # extensive tests of orchestration and metric evaluation against sklearn @pytest.mark.parametrize( "dataset", [ RAMDataset(dataset=load_arrow_head(), name="ArrowHead"), UEADataset(path=DATAPATH, name="GunPoint", target_name="class_val"), ], ) @pytest.mark.parametrize("cv", [ SingleSplit(random_state=1), StratifiedKFold(random_state=1, shuffle=True) ]) @pytest.mark.parametrize( "metric_func", [accuracy_score, f1_score] # pairwise metric # composite metric ) @pytest.mark.parametrize("results_cls", [RAMResults, HDDResults]) @pytest.mark.parametrize( "estimator", [