Example #1
0
def test_KMeansMachine():
    # Test a KMeansMachine

    means = np.array([[3, 70, 0], [4, 72, 0]], "float64")
    test_val = np.array([3, 70, 1], "float64")
    test_arr = np.array([[3, 70, 1], [5, 72, 0]], "float64")

    for transform in (to_numpy, to_dask_array):
        means, test_val, test_arr = transform(means, test_val, test_arr)

        # Initializes a KMeansMachine
        km = KMeansMachine(2)
        km.centroids_ = means

        # Distance and closest mean
        np.testing.assert_equal(km.transform(test_val)[0], np.array([1]))
        np.testing.assert_equal(km.transform(test_val)[1], np.array([6]))

        index = km.predict(test_val)
        assert index == 0

        indices = km.predict(test_arr)
        np.testing.assert_equal(indices, np.array([0, 1]))

        # Check __eq__ and is_similar_to
        km2 = KMeansMachine(2)
        assert km != km2
        assert not km.is_similar_to(km2)
        km2 = copy.deepcopy(km)
        assert km == km2
        assert km.is_similar_to(km2)
        km2.centroids_[0, 0] += 1
        assert km != km2
        assert not km.is_similar_to(km2)
Example #2
0
def test_KMeansMachine_var_and_weight():
    for transform in (to_numpy, to_dask_array):
        kmeans = KMeansMachine(2)
        kmeans.centroids_ = transform(np.array([[1.2, 1.3], [0.2, -0.3]]))

        data = np.array([[1.0, 1], [1.2, 3], [0, 0], [0.3, 0.2], [0.2, 0]])
        data = transform(data)
        variances, weights = kmeans.get_variances_and_weights_for_each_cluster(
            data)

        variances_result = np.array([[0.01, 1.0], [0.01555556, 0.00888889]])
        weights_result = np.array([0.4, 0.6])

        np.testing.assert_almost_equal(variances, variances_result, decimal=7)
        np.testing.assert_equal(weights, weights_result)