def test_mean_averaging():
    rng = np.random.RandomState(1)
    X = generate_univaritate_series(n=100, size=5, rng=rng, dtype=np.int)

    mean = MeanAveraging(X)
    average = mean.average()
    assert np.array_equal(np.array([-0.02, 0.01, 0.05, 0.11, 0.03]), average)
def test_barycenter_averaging():
    """Test barycenter averaging."""
    rng = np.random.RandomState(0)
    X = generate_univaritate_series(n=100, size=5, rng=rng, dtype=int)

    BCA = BarycenterAveraging(X)
    BCA.average()
Esempio n. 3
0
def test_random_cluster_center_initializer():
    n_clusters = 3
    k_medians = TimeSeriesKMedoids(n_clusters=n_clusters)
    rng = np.random.RandomState(0)
    X = generate_univaritate_series(n=20,
                                    size=n_clusters,
                                    rng=rng,
                                    dtype=np.int64)

    random_centers_medians = RandomCenterInitializer(
        X, n_clusters, k_medians.calculate_new_centers, rng)
    centers = random_centers_medians.initialize_centers()

    assert np.array_equal(
        np.array([[472, 600, 396], [544, 543, 714], [684, 559, 629]]), centers)

    k_means = TimeSeriesKMeans(n_clusters=n_clusters)

    random_centers_mean = RandomCenterInitializer(
        X, n_clusters, k_means.calculate_new_centers, rng)
    centers = random_centers_mean.initialize_centers()

    assert np.array_equal(
        np.array([[520, 521, 692], [491, 581, 409], [695, 492, 403]]), centers)

    k_means_dtw = TimeSeriesKMeans(n_clusters=n_clusters, metric="dtw")

    random_centers_mean = RandomCenterInitializer(
        X, n_clusters, k_means_dtw.calculate_new_centers, rng)
    centers = random_centers_mean.initialize_centers()

    assert np.array_equal(
        np.array([[516, 564, 670], [564, 546, 480], [632, 438, 527]]), centers)
Esempio n. 4
0
def make_clustering_problem(n_instances=20,
                            series_size=20,
                            return_numpy=True,
                            random_state=None):
    # Can only currently support univariate so converting
    # to univaritate for the time being
    X = generate_univaritate_series(n_instances, series_size, random_state)

    if return_numpy:
        return X
    else:
        return pd.Series(X)
def test_k_medoids():
    rng = np.random.RandomState(1)
    X_train = generate_univaritate_series(n=100,
                                          size=5,
                                          rng=rng,
                                          dtype=np.double)
    X_test = generate_univaritate_series(n=10,
                                         size=5,
                                         rng=np.random.RandomState(2),
                                         dtype=np.double)

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMedoids(n_clusters=5, max_iter=50, random_state=rng),
        X_train, X_test)
    assert np.array_equal(np.array([1, 1, 0, 0, 0, 0, 1, 1, 1, 0]), clusters)

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMedoids(n_clusters=5,
                           max_iter=50,
                           metric="euclidean",
                           random_state=rng),
        X_train,
        X_test,
    )

    assert np.array_equal(np.array([1, 3, 3, 4, 4, 3, 4, 3, 2, 4]), clusters)

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMedoids(
            n_clusters=5,
            max_iter=50,
            init_algorithm="random",
            metric="euclidean",
            random_state=rng,
        ),
        X_train,
        X_test,
    )

    assert np.array_equal(np.array([2, 1, 1, 3, 3, 0, 1, 0, 1, 3]), clusters)
Esempio n. 6
0
def test_forgy_cluster_center_initializer():
    rng = np.random.RandomState(0)
    X = generate_univaritate_series(n=20, size=1, rng=rng, dtype=np.float32)
    forgy_centers = ForgyCenterInitializer(X, 5, random_state=rng)
    centers = forgy_centers.initialize_centers()
    assert np.array_equal(
        np.array(
            [[0.33367434], [1.4940791], [0.95008844], [0.4001572],
             [-0.10321885]],
            dtype=np.float32,
        ),
        centers,
    )
def test_k_means():
    rng = np.random.RandomState(1)
    X_train = generate_univaritate_series(n=100,
                                          size=5,
                                          rng=rng,
                                          dtype=np.double)
    X_test = generate_univaritate_series(n=10,
                                         size=5,
                                         rng=np.random.RandomState(2),
                                         dtype=np.double)

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMeans(
            n_clusters=5,
            max_iter=50,
            metric="euclidean",
            averaging_algorithm="mean",
            init_algorithm="forgy",
            random_state=rng,
        ),
        X_train,
        X_test,
    )
    assert np.array_equal(np.array([3, 1, 0, 2, 0, 1, 1, 1, 1, 0]), clusters)

    # Bug with dtw as metric that is only works if the array is type double
    clusters, _ = run_clustering_experiment(
        TimeSeriesKMeans(
            n_clusters=5,
            max_iter=50,
            metric="dtw",
            averaging_algorithm="mean",
            random_state=rng,
        ),
        X_train,
        X_test,
    )
    assert np.array_equal(np.array([2, 3, 3, 4, 4, 3, 3, 3, 2, 3]), clusters)

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMeans(
            n_clusters=5,
            max_iter=5,
            averaging_algorithm="dba",
            averaging_algorithm_iterations=2,
            random_state=rng,
        ),
        X_train,
        X_test,
    )

    # Need to add seeding to dba so this works
    assert clusters

    clusters, _ = run_clustering_experiment(
        TimeSeriesKMeans(
            n_clusters=5,
            max_iter=5,
            init_algorithm="random",
            averaging_algorithm="dba",
            averaging_algorithm_iterations=2,
            random_state=rng,
        ),
        X_train,
        X_test,
    )

    assert np.array_equal(np.array([1, 4, 3, 3, 4, 4, 1, 1, 1, 4]), clusters)