Python KMeans.KMeansの例、kmeans_missing.code.kmeans.KMeans.KMeans Pythonの例

コード例 #1

0

ファイルを表示

ファイル: bnmtf_gibbs_optimised.py プロジェクト: MXDC/BNMTF

    def initialise(self,init_S='random',init_FG='random'):
        assert init_S in ['random','exp'], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S
        assert init_FG in ['random','exp','kmeans'], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG
        
        self.S = 1./self.lambdaS
        if init_S == 'random':
            for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):  
                self.S[k,l] = exponential_draw(self.lambdaS[k,l])
                
        self.F, self.G = 1./self.lambdaF, 1./self.lambdaG
        if init_FG == 'random':
            for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)):        
                self.F[i,k] = exponential_draw(self.lambdaF[i,k])
            for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)):
                self.G[j,l] = exponential_draw(self.lambdaG[j,l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R,self.M,self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2            
            
            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T,self.M.T,self.L)   
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2

        self.tau = self.alpha_s() / self.beta_s()

コード例 #2

0

ファイルを表示

    def initialise(self, init_S='random', init_FG='random', tauFSG={}):
        self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones(
            (self.I, self.K))
        self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones(
            (self.K, self.L))
        self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones(
            (self.J, self.L))

        assert init_S in ['exp', 'random'
                          ], "Unrecognised init option for S: %s." % init_S
        self.muS = 1. / self.lambdaS
        if init_S == 'random':
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.muS[k, l] = exponential_draw(self.lambdaS[k, l])

        assert init_FG in ['exp', 'random', 'kmeans'
                           ], "Unrecognised init option for F,G: %s." % init_FG
        self.muF, self.muG = 1. / self.lambdaF, 1. / self.lambdaG
        if init_FG == 'random':
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.muF[i, k] = exponential_draw(self.lambdaF[i, k])
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.muG[j, l] = exponential_draw(self.lambdaG[j, l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.muF = kmeans_F.clustering_results  #+ 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.muG = kmeans_G.clustering_results  #+ 0.2

        # Initialise the expectations and variances
        self.expF, self.varF = numpy.zeros((self.I, self.K)), numpy.zeros(
            (self.I, self.K))
        self.expS, self.varS = numpy.zeros((self.K, self.L)), numpy.zeros(
            (self.K, self.L))
        self.expG, self.varG = numpy.zeros((self.J, self.L)), numpy.zeros(
            (self.J, self.L))

        for k in range(0, self.K):
            self.update_exp_F(k)
        for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)):
            self.update_exp_S(k, l)
        for l in range(0, self.L):
            self.update_exp_G(l)

        # Initialise tau using the updates
        self.update_tau()
        #self.alpha_s, self.beta_s = self.alpha, self.beta
        self.update_exp_tau()

コード例 #3

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_initialise():
    X = [[1, 2, 3], [4, 5, 6]]
    M = [[0, 1, 1], [1, 0, 1]]
    K = 2
    seed = 0

    kmeans = KMeans(X, M, K)
    kmeans.initialise(seed)

    mins = [4.0, 2.0, 3.0]
    maxs = [4.0, 2.0, 6.0]
    assert numpy.array_equal(mins, kmeans.mins)
    assert numpy.array_equal(maxs, kmeans.maxs)

    mask_centroids = [[1, 1, 1], [1, 1, 1]]
    assert numpy.array_equal(mask_centroids, kmeans.mask_centroids)

    cluster_assignments = [-1, -1]
    assert numpy.array_equal(cluster_assignments, kmeans.cluster_assignments)

    centroids = [[4.0, 2.0, 4.2617147424925346],
                 [4.0, 2.0, 4.2148024123512426]]
    assert numpy.array_equal(centroids, kmeans.centroids)

    distances = [0, 0]
    assert numpy.array_equal(distances, kmeans.distances)

コード例 #4

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_assignment():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Test change - new closest clusters are [0,0,1] - see test_closest_cluster
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    cluster_assignments = [0, 1, 1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments

    change = kmeans.assignment()
    assert change == True
    assert numpy.array_equal([0, 0, 1], kmeans.cluster_assignments)
    assert numpy.array_equal([[0, 1], [2]], kmeans.data_point_assignments)

    # Test no change
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    cluster_assignments = [0, 0, 1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments

    change = kmeans.assignment()
    assert change == False
    assert numpy.array_equal([0, 0, 1], kmeans.cluster_assignments)
    assert numpy.array_equal([[0, 1], [2]], kmeans.data_point_assignments)

コード例 #5

0

ファイルを表示

    def initialise(self, init_S='random', init_FG='random', expo_prior=1.):
        assert init_S in ['ones', 'random', 'exponential'
                          ], "Unrecognised init option for S: %s." % init_S
        assert init_FG in ['ones', 'random', 'exponential', 'kmeans'
                           ], "Unrecognised init option for F,G: %s." % init_FG

        if init_S == 'ones':
            self.S = numpy.ones((self.K, self.L))
        elif init_S == 'random':
            self.S = numpy.random.rand(self.K, self.L)
        elif init_S == 'exponential':
            self.S = numpy.empty((self.K, self.L))
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.S[k, l] = exponential_draw(expo_prior)

        if init_FG == 'ones':
            self.F = numpy.ones((self.I, self.K))
            self.G = numpy.ones((self.J, self.L))
        elif init_FG == 'random':
            self.F = numpy.random.rand(self.I, self.K)
            self.G = numpy.random.rand(self.J, self.L)
        elif init_FG == 'exponential':
            self.F = numpy.empty((self.I, self.K))
            self.G = numpy.empty((self.J, self.L))
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.F[i, k] = exponential_draw(expo_prior)
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.G[j, l] = exponential_draw(expo_prior)
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2

コード例 #6

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_create_matrix():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.cluster_assignments = numpy.array([1, 0, 1])
    kmeans.create_matrix()

    expected_clustering_results = [[0, 1], [1, 0], [0, 1]]
    clustering_results = kmeans.clustering_results
    assert numpy.array_equal(expected_clustering_results, clustering_results)

コード例 #7

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_random_cluster_centroid():
    X = [[1, 2, 3], [4, 5, 6]]
    M = [[0, 1, 1], [1, 0, 1]]
    K = 2

    kmeans = KMeans(X, M, K)
    kmeans.mins = [4.0, 2.0, 3.0]
    kmeans.maxs = [4.0, 2.0, 6.0]

    expected_centroid = [4.0, 2.0, 4.2617147424925346]
    random.seed(0)
    centroid = kmeans.random_cluster_centroid()
    assert numpy.array_equal(expected_centroid, centroid)

コード例 #8

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_find_known_coordinate_values():
    # Normal test case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0], [2.0, 5.0], []]
    expected_lists_known_coordinate_values_1 = [[7.0], [8.0], [9.0]]
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)

    # Cluster without any points
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0, 7.0], [2.0, 5.0, 8.0],
                                                [9.0]]
    expected_lists_known_coordinate_values_1 = None
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)

コード例 #9

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_find_point_furthest_away():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Equal distance for point 0
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    kmeans.closest_cluster(X[0], 0, M[0])  # MSE = 1.0 vs 1.0
    kmeans.closest_cluster(X[1], 1, M[1])  # MSE = 4.0 vs 16.0
    kmeans.closest_cluster(X[2], 2, M[2])  # MSE = 44.5 vs 37.0

    expected_furthest_away = 2
    furthest_away = kmeans.find_point_furthest_away()
    assert expected_furthest_away == furthest_away

コード例 #10

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_compute_MSE():
    # Test case: no overlap
    X = numpy.ones((1, 5))
    M = numpy.ones((1, 5))
    K = 1

    x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
    x2 = [5.0, 4.5, 3.0, 2.5, 1.0]
    mask1 = [0, 1, 1, 0, 0]
    mask2 = [1, 0, 0, 0, 1]
    kmeans = KMeans(X, M, K)

    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert output == None

    # Overlap
    mask1 = [1, 1, 1, 0, 1]
    mask2 = [0, 1, 1, 1, 1]

    expected_output = (2.5**2 + 4.0**2) / 3.0
    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert expected_output == output

コード例 #11

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_closest_cluster():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Equal distance for point 0
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    expected_closest_cluster_0 = 0  # MSE = 1.0 vs 1.0
    expected_closest_cluster_1 = 0  # MSE = 4.0 vs 16.0
    expected_closest_cluster_2 = 1  # MSE = 44.5 vs 37.0
    closest_cluster_0 = kmeans.closest_cluster(X[0], 0, M[0])
    closest_cluster_1 = kmeans.closest_cluster(X[1], 1, M[1])
    closest_cluster_2 = kmeans.closest_cluster(X[2], 2, M[2])

    assert expected_closest_cluster_0 == closest_cluster_0
    assert expected_closest_cluster_1 == closest_cluster_1
    assert expected_closest_cluster_2 == closest_cluster_2

    # Also test whether the distances are set correctly
    expected_distances = [1.0, 4.0, 37.0]
    distances = kmeans.distances
    assert numpy.array_equal(expected_distances, distances)

    # Test when all MSEs return None (impossible but still testing behaviour)
    centroids = numpy.ones((2, 3))
    mask_centroids = [[0, 0, 1], [0, 0, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    expected_closest_cluster = 1
    closest_cluster = kmeans.closest_cluster(X[0], 0, M[0])
    assert expected_closest_cluster == closest_cluster

コード例 #12

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_update():
    # Normal case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]

    new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]]
    new_mask_centroids = [[1, 1, 0], [1, 1, 1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K, 'random')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[4.0, 5.0, 9.0],
                     [6.066531109150288, 6.547726417641815, 9.0]]
    new_mask_centroids = [[1, 1, 1], [1, 1, 1]]

    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2.
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 3
    kmeans = KMeans(X, M, K, resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2],
         []])  #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0, 0, 1]
    kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0],
                        [500.0, 500.0, 500.0]]
    kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2],
                           kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
    new_data_point_assignments = [[0], [1], [2]]
    new_distances = [0, 0, 0]

    kmeans.update()

    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances, kmeans.distances)
    assert numpy.array_equal(new_centroids, kmeans.centroids)

コード例 #13

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_cluster():
    ### No missing values case.
    # Points 1,2 will first go to cluster 2, and point 3 to cluster 1.
    # Then point 1 will switch to cluster 1.
    X = [[2, 5], [7, 5], [2, 3]]
    M = numpy.ones((3, 2))
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 2.0], [4.0, 5.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1]

    expected_centroids = [[2.0, 4.0], [7.0, 5.0]]
    expected_cluster_assignments = [0, 1, 0]
    expected_data_point_assignments = [[0, 2], [1]]
    expected_clustering_results = [[1, 0], [0, 1], [1, 0]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

    ### Missing values case.
    # Points 2,3,4 will first go to cluster 2, and point 1 to cluster 1.
    # Then point 2 will switch to cluster 1.
    X = [[2, 5], [3, -1], [10, 1], [-1, 2]]
    M = [[1, 1], [1, 0], [1, 1], [0, 1]]
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 7.0], [3.0, 2.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1, -1]

    expected_centroids = [[2.5, 5.0], [10.0, 1.5]]
    expected_cluster_assignments = [0, 0, 1, 1]
    expected_data_point_assignments = [[0, 1], [2, 3]]
    expected_clustering_results = [[1, 0], [1, 0], [0, 1], [0, 1]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

    ### Cluster with 0 coordinate.
    # Cluster 1 gets points 1 and 2, cluster 2 gets 3 and 4.
    X = [[2, 5], [3, -1], [-1, 1], [-1, 2]]
    M = [[1, 1], [1, 0], [0, 1], [0, 1]]
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 7.0], [4.0, 4.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1, -1]

    expected_centroids = [[2.5, 5.0], [0, 1.5]]
    expected_cluster_assignments = [0, 0, 1, 1]
    expected_data_point_assignments = [[0, 1], [2, 3]]
    expected_clustering_results = [[1, 0], [1, 0], [0, 1], [0, 1]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

コード例 #14

0

ファイルを表示

ファイル: test_kmeans.py プロジェクト: ibrahim85/kmeans_missing

def test_init():
    # Test getting an exception when X and M are different sizes, X is not a 2D array, and K <= 0
    X1 = numpy.ones(3)
    M = numpy.ones((2, 3))
    K = 0
    with pytest.raises(AssertionError) as error:
        KMeans(X1, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not a two-dimensional array, but instead 1-dimensional."

    X2 = numpy.ones((4, 3, 2))
    with pytest.raises(AssertionError) as error:
        KMeans(X2, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not a two-dimensional array, but instead 3-dimensional."

    X3 = numpy.ones((3, 2))
    with pytest.raises(AssertionError) as error:
        KMeans(X3, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively."

    X4 = numpy.ones((2, 3))
    K1 = 0
    with pytest.raises(AssertionError) as error:
        KMeans(X4, M, K1)
    assert str(error.value) == "K should be greater than 0."

    # Test getting an exception if a row or column is entirely unknown
    X = numpy.ones((2, 3))
    M1 = [[1, 1, 1], [0, 0, 0]]
    M2 = [[1, 1, 0], [1, 0, 0]]
    K = 1

    with pytest.raises(AssertionError) as error:
        KMeans(X, M1, K)
    assert str(error.value) == "Fully unobserved row in X, row 1."
    with pytest.raises(AssertionError) as error:
        KMeans(X, M2, K)
    assert str(error.value) == "Fully unobserved column in X, column 2."

    # Test completely observed case
    X = numpy.ones((2, 3))
    M = numpy.ones((2, 3))

    omega_rows = [[0, 1, 2], [0, 1, 2]]
    omega_columns = [[0, 1], [0, 1], [0, 1]]

    kmeans = KMeans(X, M, K)
    assert numpy.array_equal(omega_rows, kmeans.omega_rows)
    assert numpy.array_equal(omega_columns, kmeans.omega_columns)
    assert kmeans.no_points == 2
    assert kmeans.no_coordinates == 3

    # Test partially observed case
    M = [[1, 0, 1], [0, 1, 1]]

    omega_rows = [[0, 2], [1, 2]]
    omega_columns = [[0], [1], [0, 1]]

    kmeans = KMeans(X, M, K)
    assert numpy.array_equal(omega_rows, kmeans.omega_rows)
    assert numpy.array_equal(omega_columns, kmeans.omega_columns)