def test_compute_MSE(): # Test case: no overlap X = numpy.ones((1,5)) M = numpy.ones((1,5)) K = 1 x1 = [1.0,2.0,3.0,4.0,5.0] x2 = [5.0,4.5,3.0,2.5,1.0] mask1 = [0,1,1,0,0] mask2 = [1,0,0,0,1] kmeans = KMeans(X,M,K) output = kmeans.compute_MSE(x1,x2,mask1,mask2) assert output == None # Overlap mask1 = [1,1,1,0,1] mask2 = [0,1,1,1,1] expected_output = ( 2.5**2 + 4.0**2 ) / 3.0 output = kmeans.compute_MSE(x1,x2,mask1,mask2) assert expected_output == output
def test_compute_MSE(): # Test case: no overlap X = numpy.ones((1, 5)) M = numpy.ones((1, 5)) K = 1 x1 = [1.0, 2.0, 3.0, 4.0, 5.0] x2 = [5.0, 4.5, 3.0, 2.5, 1.0] mask1 = [0, 1, 1, 0, 0] mask2 = [1, 0, 0, 0, 1] kmeans = KMeans(X, M, K) output = kmeans.compute_MSE(x1, x2, mask1, mask2) assert output == None # Overlap mask1 = [1, 1, 1, 0, 1] mask2 = [0, 1, 1, 1, 1] expected_output = (2.5**2 + 4.0**2) / 3.0 output = kmeans.compute_MSE(x1, x2, mask1, mask2) assert expected_output == output
def test_update(): # Normal case X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 2 kmeans = KMeans(X,M,K) kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1 kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] new_centroids = [[1.0,3.5,0],[7.0,8.0,9.0]] new_mask_centroids = [[1,1,0],[1,1,1]] kmeans.update() assert numpy.array_equal(new_centroids,kmeans.centroids) assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids) # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 2 kmeans = KMeans(X,M,K,'random') kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1 kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mins = [1.0,2.0,9.0] kmeans.maxs = [7.0,8.0,9.0] new_centroids = [[4.0,5.0,9.0],[6.066531109150288,6.547726417641815,9.0]] new_mask_centroids = [[1,1,1],[1,1,1]] random.seed(0) kmeans.update() assert numpy.array_equal(new_centroids,kmeans.centroids) assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids) # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. # Point 2 is furthest away, so gets reassigned to cluster 2 - making # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1 X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 3 kmeans = KMeans(X,M,K,resolve_empty='singleton') kmeans.data_point_assignments = numpy.array([[0,1],[2],[]]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2 kmeans.cluster_assignments = [0,0,1] kmeans.centroids = [[1.0,2.0,3.0],[15.0,16.0,17.0],[500.0,500.0,500.0]] kmeans.mask_centroids = [[1,1,0],[1,1,1],[1,1,1]] kmeans.distances = numpy.array([ kmeans.compute_MSE(kmeans.X[0],kmeans.centroids[0],M[0],kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[1],kmeans.centroids[0],M[1],kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[2],kmeans.centroids[1],M[2],kmeans.mask_centroids[1]) ]) kmeans.mins = [1.0,2.0,9.0] kmeans.maxs = [7.0,8.0,9.0] new_centroids = [[1.0,2.0,0],[4.0,5.0,6.0],[7.0,8.0,9.0]] new_data_point_assignments = [[0],[1],[2]] new_distances = [0,0,0] kmeans.update() assert new_data_point_assignments == list(kmeans.data_point_assignments) assert numpy.array_equal(new_distances,kmeans.distances) assert numpy.array_equal(new_centroids,kmeans.centroids)
def test_update(): # Normal case X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 2 kmeans = KMeans(X, M, K) kmeans.data_point_assignments = numpy.array( [[0, 1], [2]]) #points 0,1 to cluster 0, point 2 to cluster 1 kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]] new_mask_centroids = [[1, 1, 0], [1, 1, 1]] kmeans.update() assert numpy.array_equal(new_centroids, kmeans.centroids) assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids) # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 2 kmeans = KMeans(X, M, K, 'random') kmeans.data_point_assignments = numpy.array( [[0, 1, 2], []]) #points 0,1,2 to cluster 0, none to cluster 1 kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mins = [1.0, 2.0, 9.0] kmeans.maxs = [7.0, 8.0, 9.0] new_centroids = [[4.0, 5.0, 9.0], [6.066531109150288, 6.547726417641815, 9.0]] new_mask_centroids = [[1, 1, 1], [1, 1, 1]] random.seed(0) kmeans.update() assert numpy.array_equal(new_centroids, kmeans.centroids) assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids) # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. # Point 2 is furthest away, so gets reassigned to cluster 2 - making # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1 X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 3 kmeans = KMeans(X, M, K, resolve_empty='singleton') kmeans.data_point_assignments = numpy.array( [[0, 1], [2], []]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2 kmeans.cluster_assignments = [0, 0, 1] kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0], [500.0, 500.0, 500.0]] kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]] kmeans.distances = numpy.array([ kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0], kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1], kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2], kmeans.mask_centroids[1]) ]) kmeans.mins = [1.0, 2.0, 9.0] kmeans.maxs = [7.0, 8.0, 9.0] new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] new_data_point_assignments = [[0], [1], [2]] new_distances = [0, 0, 0] kmeans.update() assert new_data_point_assignments == list(kmeans.data_point_assignments) assert numpy.array_equal(new_distances, kmeans.distances) assert numpy.array_equal(new_centroids, kmeans.centroids)