Exemple #1
0
def test_KMeansMachine():
    # Test a KMeansMachine

    means = np.array([[3, 70, 0], [4, 72, 0]], "float64")
    test_val = np.array([3, 70, 1], "float64")
    test_arr = np.array([[3, 70, 1], [5, 72, 0]], "float64")

    for transform in (to_numpy, to_dask_array):
        means, test_val, test_arr = transform(means, test_val, test_arr)

        # Initializes a KMeansMachine
        km = KMeansMachine(2)
        km.centroids_ = means

        # Distance and closest mean
        np.testing.assert_equal(km.transform(test_val)[0], np.array([1]))
        np.testing.assert_equal(km.transform(test_val)[1], np.array([6]))

        index = km.predict(test_val)
        assert index == 0

        indices = km.predict(test_arr)
        np.testing.assert_equal(indices, np.array([0, 1]))

        # Check __eq__ and is_similar_to
        km2 = KMeansMachine(2)
        assert km != km2
        assert not km.is_similar_to(km2)
        km2 = copy.deepcopy(km)
        assert km == km2
        assert km.is_similar_to(km2)
        km2.centroids_[0, 0] += 1
        assert km != km2
        assert not km.is_similar_to(km2)
Exemple #2
0
from sklearn.datasets import load_iris

from bob.learn.em import KMeansMachine

iris_data = load_iris()
data = iris_data.data
setosa = data[iris_data.target == 0]
versicolor = data[iris_data.target == 1]
virginica = data[iris_data.target == 2]

# Training KMeans
# 3 clusters with a feature dimensionality of 2
machine = KMeansMachine(n_clusters=3, init_method="k-means++").fit(data)

predictions = machine.predict(data)

# Plotting
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0],
            versicolor[:, 1],
            c="goldenrod",
            label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(
    machine.centroids_[:, 0],
    machine.centroids_[:, 1],
    c="blue",
    marker="x",
    label="centroids",