def test_mean_averaging(): rng = np.random.RandomState(1) X = generate_univaritate_series(n=100, size=5, rng=rng, dtype=np.int) mean = MeanAveraging(X) average = mean.average() assert np.array_equal(np.array([-0.02, 0.01, 0.05, 0.11, 0.03]), average)
def test_barycenter_averaging(): """Test barycenter averaging.""" rng = np.random.RandomState(0) X = generate_univaritate_series(n=100, size=5, rng=rng, dtype=int) BCA = BarycenterAveraging(X) BCA.average()
def test_random_cluster_center_initializer(): n_clusters = 3 k_medians = TimeSeriesKMedoids(n_clusters=n_clusters) rng = np.random.RandomState(0) X = generate_univaritate_series(n=20, size=n_clusters, rng=rng, dtype=np.int64) random_centers_medians = RandomCenterInitializer( X, n_clusters, k_medians.calculate_new_centers, rng) centers = random_centers_medians.initialize_centers() assert np.array_equal( np.array([[472, 600, 396], [544, 543, 714], [684, 559, 629]]), centers) k_means = TimeSeriesKMeans(n_clusters=n_clusters) random_centers_mean = RandomCenterInitializer( X, n_clusters, k_means.calculate_new_centers, rng) centers = random_centers_mean.initialize_centers() assert np.array_equal( np.array([[520, 521, 692], [491, 581, 409], [695, 492, 403]]), centers) k_means_dtw = TimeSeriesKMeans(n_clusters=n_clusters, metric="dtw") random_centers_mean = RandomCenterInitializer( X, n_clusters, k_means_dtw.calculate_new_centers, rng) centers = random_centers_mean.initialize_centers() assert np.array_equal( np.array([[516, 564, 670], [564, 546, 480], [632, 438, 527]]), centers)
def make_clustering_problem(n_instances=20, series_size=20, return_numpy=True, random_state=None): # Can only currently support univariate so converting # to univaritate for the time being X = generate_univaritate_series(n_instances, series_size, random_state) if return_numpy: return X else: return pd.Series(X)
def test_k_medoids(): rng = np.random.RandomState(1) X_train = generate_univaritate_series(n=100, size=5, rng=rng, dtype=np.double) X_test = generate_univaritate_series(n=10, size=5, rng=np.random.RandomState(2), dtype=np.double) clusters, _ = run_clustering_experiment( TimeSeriesKMedoids(n_clusters=5, max_iter=50, random_state=rng), X_train, X_test) assert np.array_equal(np.array([1, 1, 0, 0, 0, 0, 1, 1, 1, 0]), clusters) clusters, _ = run_clustering_experiment( TimeSeriesKMedoids(n_clusters=5, max_iter=50, metric="euclidean", random_state=rng), X_train, X_test, ) assert np.array_equal(np.array([1, 3, 3, 4, 4, 3, 4, 3, 2, 4]), clusters) clusters, _ = run_clustering_experiment( TimeSeriesKMedoids( n_clusters=5, max_iter=50, init_algorithm="random", metric="euclidean", random_state=rng, ), X_train, X_test, ) assert np.array_equal(np.array([2, 1, 1, 3, 3, 0, 1, 0, 1, 3]), clusters)
def test_forgy_cluster_center_initializer(): rng = np.random.RandomState(0) X = generate_univaritate_series(n=20, size=1, rng=rng, dtype=np.float32) forgy_centers = ForgyCenterInitializer(X, 5, random_state=rng) centers = forgy_centers.initialize_centers() assert np.array_equal( np.array( [[0.33367434], [1.4940791], [0.95008844], [0.4001572], [-0.10321885]], dtype=np.float32, ), centers, )
def test_k_means(): rng = np.random.RandomState(1) X_train = generate_univaritate_series(n=100, size=5, rng=rng, dtype=np.double) X_test = generate_univaritate_series(n=10, size=5, rng=np.random.RandomState(2), dtype=np.double) clusters, _ = run_clustering_experiment( TimeSeriesKMeans( n_clusters=5, max_iter=50, metric="euclidean", averaging_algorithm="mean", init_algorithm="forgy", random_state=rng, ), X_train, X_test, ) assert np.array_equal(np.array([3, 1, 0, 2, 0, 1, 1, 1, 1, 0]), clusters) # Bug with dtw as metric that is only works if the array is type double clusters, _ = run_clustering_experiment( TimeSeriesKMeans( n_clusters=5, max_iter=50, metric="dtw", averaging_algorithm="mean", random_state=rng, ), X_train, X_test, ) assert np.array_equal(np.array([2, 3, 3, 4, 4, 3, 3, 3, 2, 3]), clusters) clusters, _ = run_clustering_experiment( TimeSeriesKMeans( n_clusters=5, max_iter=5, averaging_algorithm="dba", averaging_algorithm_iterations=2, random_state=rng, ), X_train, X_test, ) # Need to add seeding to dba so this works assert clusters clusters, _ = run_clustering_experiment( TimeSeriesKMeans( n_clusters=5, max_iter=5, init_algorithm="random", averaging_algorithm="dba", averaging_algorithm_iterations=2, random_state=rng, ), X_train, X_test, ) assert np.array_equal(np.array([1, 4, 3, 3, 4, 4, 1, 1, 1, 4]), clusters)