예제 #1
0
def test_find_known_coordinate_values():
    # Normal test case
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1
    
    expected_lists_known_coordinate_values_0 = [[1.0],[2.0,5.0],[]]
    expected_lists_known_coordinate_values_1 = [[7.0],[8.0],[9.0]]
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)
    
    assert numpy.array_equal(expected_lists_known_coordinate_values_0,lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,lists_known_coordinate_values_1)
    
    # Cluster without any points
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1
    
    expected_lists_known_coordinate_values_0 = [[1.0,7.0],[2.0,5.0,8.0],[9.0]]
    expected_lists_known_coordinate_values_1 = None
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)
    
    assert numpy.array_equal(expected_lists_known_coordinate_values_0,lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,lists_known_coordinate_values_1)
예제 #2
0
def test_find_known_coordinate_values():
    # Normal test case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0], [2.0, 5.0], []]
    expected_lists_known_coordinate_values_1 = [[7.0], [8.0], [9.0]]
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)

    # Cluster without any points
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0, 7.0], [2.0, 5.0, 8.0],
                                                [9.0]]
    expected_lists_known_coordinate_values_1 = None
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)
예제 #3
0
def test_update():
    # Normal case
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    
    new_centroids = [[1.0,3.5,0],[7.0,8.0,9.0]]
    new_mask_centroids = [[1,1,0],[1,1,1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K,'random')
    kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[4.0,5.0,9.0],[6.066531109150288,6.547726417641815,9.0]]
    new_mask_centroids = [[1,1,1],[1,1,1]]
    
    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. 
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making 
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 3
    kmeans = KMeans(X,M,K,resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array([[0,1],[2],[]]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0,0,1]
    kmeans.centroids = [[1.0,2.0,3.0],[15.0,16.0,17.0],[500.0,500.0,500.0]]
    kmeans.mask_centroids = [[1,1,0],[1,1,1],[1,1,1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0],kmeans.centroids[0],M[0],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1],kmeans.centroids[0],M[1],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2],kmeans.centroids[1],M[2],kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[1.0,2.0,0],[4.0,5.0,6.0],[7.0,8.0,9.0]]
    new_data_point_assignments = [[0],[1],[2]]
    new_distances = [0,0,0]
    
    kmeans.update()
    
    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances,kmeans.distances)
    assert numpy.array_equal(new_centroids,kmeans.centroids)
예제 #4
0
def test_update():
    # Normal case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]

    new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]]
    new_mask_centroids = [[1, 1, 0], [1, 1, 1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K, 'random')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[4.0, 5.0, 9.0],
                     [6.066531109150288, 6.547726417641815, 9.0]]
    new_mask_centroids = [[1, 1, 1], [1, 1, 1]]

    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2.
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 3
    kmeans = KMeans(X, M, K, resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2],
         []])  #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0, 0, 1]
    kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0],
                        [500.0, 500.0, 500.0]]
    kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2],
                           kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
    new_data_point_assignments = [[0], [1], [2]]
    new_distances = [0, 0, 0]

    kmeans.update()

    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances, kmeans.distances)
    assert numpy.array_equal(new_centroids, kmeans.centroids)