예제 #1
0
def test_knn_on_arrowhead():
    # load gunpoint data
    X_train, y_train = load_arrow_head(split="train", return_X_y=True)
    X_test, y_test = load_arrow_head(split="test", return_X_y=True)
    for i in range(0, len(distance_functions)):
        knn = KNeighborsTimeSeriesClassifier(distance=distance_functions[i], )
        knn.fit(X_train, y_train)
        pred = knn.predict(X_test)
        correct = 0
        for j in range(0, len(pred)):
            if pred[j] == y_test[j]:
                correct = correct + 1
        assert correct == expected_correct[distance_functions[i]]
예제 #2
0
def time_clusterers():
    """Time tests for clusterers."""
    k_means = TimeSeriesKMeans(
        n_clusters=5,  # Number of desired centers
        init_algorithm="forgy",  # Center initialisation technique
        max_iter=
        10,  # Maximum number of iterations for refinement on training set
        metric="dtw",  # Distance metric to use
        averaging_method="mean",  # Averaging technique to use
        random_state=1,
    )
    X_train, y_train = load_arrow_head(split="train")
    X_test, y_test = load_arrow_head(split="test")
    k_means.fit(X_train)
    plot_cluster_algorithm(k_means, X_test, k_means.n_clusters)
예제 #3
0
def test_load_UCR_UEA_dataset_download(tmpdir):
    # tmpdir is a pytest fixture
    extract_path = tmpdir.mkdtemp()
    name = "ArrowHead"
    actual_X, actual_y = load_UCR_UEA_dataset(name,
                                              return_X_y=True,
                                              extract_path=extract_path)
    data_path = os.path.join(extract_path, name)
    assert os.path.exists(data_path)

    # check files
    files = [
        f"{name}.txt",
        f"{name}_TEST.arff",
        f"{name}_TEST.ts",
        f"{name}_TEST.txt",
        f"{name}_TRAIN.arff",
        f"{name}_TRAIN.ts",
        f"{name}_TRAIN.txt",
        # "README.md",
    ]

    for file in os.listdir(data_path):
        assert file in files
        files.remove(file)
    assert len(files) == 0

    # check data
    expected_X, expected_y = load_arrow_head(return_X_y=True)
    _assert_array_almost_equal(actual_X, expected_X, decimal=4)
    np.testing.assert_array_equal(expected_y, actual_y)
예제 #4
0
def test_center_init(center_init_callable: Callable[[np.ndarray], np.ndarray]):
    """Test center initialisation algorithms."""
    k = 5
    X, y = load_arrow_head(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    X_train = convert_to(X_train, "numpy3D")
    random_state = check_random_state(1)
    test_centers = center_init_callable(X_train, k, random_state)
    assert len(test_centers) == k
    assert len(np.unique(test_centers, axis=1)) == k
예제 #5
0
def plot_dba_example():
    """Plot dba."""
    import matplotlib.pyplot as plt

    X_train, y_train = load_arrow_head(split="train")
    X_train = convert_to(X_train, "numpy3D")

    def plot_helper(barycenter):
        for series in X_train:
            plt.plot(series.ravel(), "k-", alpha=0.2)
        plt.plot(barycenter.ravel(), "r-", linewidth=2)

    ax1 = plt.subplot()

    plt.subplot(4, 1, 1, sharex=ax1)
    plt.title("Sktime DBA (using dtw)")
    plot_helper(
        dba(X_train, distance_metric="dtw", medoids_distance_metric="dtw"))

    plt.subplot(4, 1, 2, sharex=ax1)
    plt.title("Sktime DBA (using wdtw)")
    plot_helper(
        dba(X_train, distance_metric="wdtw", medoids_distance_metric="wdtw"))

    plt.subplot(4, 1, 3, sharex=ax1)
    plt.title("Sktime DBA (using lcss)")
    plot_helper(
        dba(X_train, distance_metric="lcss", medoids_distance_metric="lcss"))

    plt.subplot(4, 1, 4, sharex=ax1)
    plt.title("Sktime DBA (using msm)")
    plot_helper(dba(X_train, distance_metric="msm"))

    ax1.set_xlim([0, X_train.shape[2]])

    # show the plot(s)
    plt.tight_layout()
    plt.show()
예제 #6
0
    cv = SingleSplit(random_state=1)
    train_idx, test_idx = next(cv.split(data))
    train = data.iloc[train_idx, :]
    test = data.iloc[test_idx, :]
    strategy.fit(task, train)
    expected = strategy.predict(test)

    # compare results
    np.testing.assert_array_equal(actual, expected)


# extensive tests of orchestration and metric evaluation against sklearn
@pytest.mark.parametrize(
    "dataset",
    [
        RAMDataset(dataset=load_arrow_head(return_X_y=False),
                   name="ArrowHead"),
        UEADataset(path=DATAPATH, name="GunPoint", target_name="class_val"),
    ],
)
@pytest.mark.parametrize("cv", [
    SingleSplit(random_state=1),
    StratifiedKFold(random_state=1, shuffle=True)
])
@pytest.mark.parametrize(
    "metric_func",
    [accuracy_score, f1_score]  # pairwise metric  # composite metric
)
@pytest.mark.parametrize("results_cls", [RAMResults, HDDResults])
@pytest.mark.parametrize(
    "estimator",
예제 #7
0
# -*- coding: utf-8 -*-
"""Clustering usage tests and examples."""
import numpy as np

from sktime.clustering.k_means import TimeSeriesKMeans
from sktime.clustering.k_medoids import TimeSeriesKMedoids
from sktime.datasets import load_arrow_head


def form_cluster_list(clusters, n) -> np.array:
    """Form a cluster list."""
    preds = np.zeros(n)
    for i in range(len(clusters)):
        for j in range(len(clusters[i])):
            preds[clusters[i][j]] = i
    return preds


if __name__ == "__main__":
    clusterer1 = TimeSeriesKMeans(n_clusters=5, max_iter=50, averaging_algorithm="mean")
    clusterer2 = TimeSeriesKMedoids()
    X, y = load_arrow_head(return_X_y=True)
    clusterer1.fit(X)
    c = clusterer1.predict(X)
    x = form_cluster_list(c, len(y))
    for i in range(len(x)):
        print(i, " is in cluster ", x[i])
예제 #8
0
    cv = SingleSplit(random_state=1)
    train_idx, test_idx = next(cv.split(data))
    train = data.iloc[train_idx, :]
    test = data.iloc[test_idx, :]
    strategy.fit(task, train)
    expected = strategy.predict(test)

    # compare results
    np.testing.assert_array_equal(actual, expected)


# extensive tests of orchestration and metric evaluation against sklearn
@pytest.mark.parametrize(
    "dataset",
    [
        RAMDataset(dataset=load_arrow_head(), name="ArrowHead"),
        UEADataset(path=DATAPATH, name="GunPoint", target_name="class_val"),
    ],
)
@pytest.mark.parametrize("cv", [
    SingleSplit(random_state=1),
    StratifiedKFold(random_state=1, shuffle=True)
])
@pytest.mark.parametrize(
    "metric_func",
    [accuracy_score, f1_score]  # pairwise metric  # composite metric
)
@pytest.mark.parametrize("results_cls", [RAMResults, HDDResults])
@pytest.mark.parametrize(
    "estimator",
    [