예제 #1
0
def test_compute_MSE():
    # Test case: no overlap
    X = numpy.ones((1,5))
    M = numpy.ones((1,5))
    K = 1
    
    x1 = [1.0,2.0,3.0,4.0,5.0]
    x2 = [5.0,4.5,3.0,2.5,1.0]
    mask1 = [0,1,1,0,0]
    mask2 = [1,0,0,0,1]
    kmeans = KMeans(X,M,K)
    
    output = kmeans.compute_MSE(x1,x2,mask1,mask2)
    assert output == None
    
    # Overlap
    mask1 = [1,1,1,0,1]
    mask2 = [0,1,1,1,1]
    
    expected_output = ( 2.5**2 + 4.0**2 ) / 3.0
    output = kmeans.compute_MSE(x1,x2,mask1,mask2)
    assert expected_output == output
예제 #2
0
def test_compute_MSE():
    # Test case: no overlap
    X = numpy.ones((1, 5))
    M = numpy.ones((1, 5))
    K = 1

    x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
    x2 = [5.0, 4.5, 3.0, 2.5, 1.0]
    mask1 = [0, 1, 1, 0, 0]
    mask2 = [1, 0, 0, 0, 1]
    kmeans = KMeans(X, M, K)

    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert output == None

    # Overlap
    mask1 = [1, 1, 1, 0, 1]
    mask2 = [0, 1, 1, 1, 1]

    expected_output = (2.5**2 + 4.0**2) / 3.0
    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert expected_output == output
예제 #3
0
def test_update():
    # Normal case
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    
    new_centroids = [[1.0,3.5,0],[7.0,8.0,9.0]]
    new_mask_centroids = [[1,1,0],[1,1,1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K,'random')
    kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[4.0,5.0,9.0],[6.066531109150288,6.547726417641815,9.0]]
    new_mask_centroids = [[1,1,1],[1,1,1]]
    
    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. 
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making 
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 3
    kmeans = KMeans(X,M,K,resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array([[0,1],[2],[]]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0,0,1]
    kmeans.centroids = [[1.0,2.0,3.0],[15.0,16.0,17.0],[500.0,500.0,500.0]]
    kmeans.mask_centroids = [[1,1,0],[1,1,1],[1,1,1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0],kmeans.centroids[0],M[0],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1],kmeans.centroids[0],M[1],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2],kmeans.centroids[1],M[2],kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[1.0,2.0,0],[4.0,5.0,6.0],[7.0,8.0,9.0]]
    new_data_point_assignments = [[0],[1],[2]]
    new_distances = [0,0,0]
    
    kmeans.update()
    
    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances,kmeans.distances)
    assert numpy.array_equal(new_centroids,kmeans.centroids)
예제 #4
0
def test_update():
    # Normal case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]

    new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]]
    new_mask_centroids = [[1, 1, 0], [1, 1, 1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K, 'random')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[4.0, 5.0, 9.0],
                     [6.066531109150288, 6.547726417641815, 9.0]]
    new_mask_centroids = [[1, 1, 1], [1, 1, 1]]

    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2.
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 3
    kmeans = KMeans(X, M, K, resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2],
         []])  #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0, 0, 1]
    kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0],
                        [500.0, 500.0, 500.0]]
    kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2],
                           kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
    new_data_point_assignments = [[0], [1], [2]]
    new_distances = [0, 0, 0]

    kmeans.update()

    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances, kmeans.distances)
    assert numpy.array_equal(new_centroids, kmeans.centroids)