def test_KMeansMachine(): # Test a KMeansMachine means = np.array([[3, 70, 0], [4, 72, 0]], "float64") test_val = np.array([3, 70, 1], "float64") test_arr = np.array([[3, 70, 1], [5, 72, 0]], "float64") for transform in (to_numpy, to_dask_array): means, test_val, test_arr = transform(means, test_val, test_arr) # Initializes a KMeansMachine km = KMeansMachine(2) km.centroids_ = means # Distance and closest mean np.testing.assert_equal(km.transform(test_val)[0], np.array([1])) np.testing.assert_equal(km.transform(test_val)[1], np.array([6])) index = km.predict(test_val) assert index == 0 indices = km.predict(test_arr) np.testing.assert_equal(indices, np.array([0, 1])) # Check __eq__ and is_similar_to km2 = KMeansMachine(2) assert km != km2 assert not km.is_similar_to(km2) km2 = copy.deepcopy(km) assert km == km2 assert km.is_similar_to(km2) km2.centroids_[0, 0] += 1 assert km != km2 assert not km.is_similar_to(km2)
def test_KMeansMachine_var_and_weight(): for transform in (to_numpy, to_dask_array): kmeans = KMeansMachine(2) kmeans.centroids_ = transform(np.array([[1.2, 1.3], [0.2, -0.3]])) data = np.array([[1.0, 1], [1.2, 3], [0, 0], [0.3, 0.2], [0.2, 0]]) data = transform(data) variances, weights = kmeans.get_variances_and_weights_for_each_cluster( data) variances_result = np.array([[0.01, 1.0], [0.01555556, 0.00888889]]) weights_result = np.array([0.4, 0.6]) np.testing.assert_almost_equal(variances, variances_result, decimal=7) np.testing.assert_equal(weights, weights_result)