Python KMeans 예제들, kmeans_missing.code.kmeans.KMeans Python 예제들

예제 #1

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_initialise():
    X = [[1, 2, 3], [4, 5, 6]]
    M = [[0, 1, 1], [1, 0, 1]]
    K = 2
    seed = 0

    kmeans = KMeans(X, M, K)
    kmeans.initialise(seed)

    mins = [4.0, 2.0, 3.0]
    maxs = [4.0, 2.0, 6.0]
    assert numpy.array_equal(mins, kmeans.mins)
    assert numpy.array_equal(maxs, kmeans.maxs)

    mask_centroids = [[1, 1, 1], [1, 1, 1]]
    assert numpy.array_equal(mask_centroids, kmeans.mask_centroids)

    cluster_assignments = [-1, -1]
    assert numpy.array_equal(cluster_assignments, kmeans.cluster_assignments)

    centroids = [[4.0, 2.0, 4.2617147424925346],
                 [4.0, 2.0, 4.2148024123512426]]
    assert numpy.array_equal(centroids, kmeans.centroids)

    distances = [0, 0]
    assert numpy.array_equal(distances, kmeans.distances)

예제 #2

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_assignment():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Test change - new closest clusters are [0,0,1] - see test_closest_cluster
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    cluster_assignments = [0, 1, 1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments

    change = kmeans.assignment()
    assert change == True
    assert numpy.array_equal([0, 0, 1], kmeans.cluster_assignments)
    assert numpy.array_equal([[0, 1], [2]], kmeans.data_point_assignments)

    # Test no change
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    cluster_assignments = [0, 0, 1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments

    change = kmeans.assignment()
    assert change == False
    assert numpy.array_equal([0, 0, 1], kmeans.cluster_assignments)
    assert numpy.array_equal([[0, 1], [2]], kmeans.data_point_assignments)

예제 #3

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_initialise():
    X = [[1,2,3],[4,5,6]]
    M = [[0,1,1],[1,0,1]]
    K = 2
    seed = 0
    
    kmeans = KMeans(X,M,K)
    kmeans.initialise(seed)
    
    mins = [4.0,2.0,3.0]
    maxs = [4.0,2.0,6.0]
    assert numpy.array_equal(mins,kmeans.mins)
    assert numpy.array_equal(maxs,kmeans.maxs)
    
    mask_centroids = [[1,1,1],[1,1,1]]
    assert numpy.array_equal(mask_centroids,kmeans.mask_centroids)
    
    cluster_assignments = [-1,-1]
    assert numpy.array_equal(cluster_assignments,kmeans.cluster_assignments)    
    
    centroids = [[4.0,2.0,4.2617147424925346],[4.0,2.0,4.2148024123512426]]
    assert numpy.array_equal(centroids,kmeans.centroids)
    
    distances = [0,0]
    assert numpy.array_equal(distances,kmeans.distances)

예제 #4

0

파일 보기

파일: nmtf_icm.py 프로젝트: lizhangzhan/BNMTF

    def initialise(self,init_S='random',init_FG='random'):
        assert init_S in ['random','exp'], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S
        assert init_FG in ['random','exp','kmeans'], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG
        
        self.S = 1./self.lambdaS
        if init_S == 'random':
            for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):  
                self.S[k,l] = exponential_draw(self.lambdaS[k,l])
                
        self.F, self.G = 1./self.lambdaF, 1./self.lambdaG
        if init_FG == 'random':
            for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)):        
                self.F[i,k] = exponential_draw(self.lambdaF[i,k])
            for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)):
                self.G[j,l] = exponential_draw(self.lambdaG[j,l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R,self.M,self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2            
            
            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T,self.M.T,self.L)   
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2

        self.tau = gamma_mode(self.alpha_s(), self.beta_s())

예제 #5

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_assignment():
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    
    # Test change - new closest clusters are [0,0,1] - see test_closest_cluster
    centroids = [[1.0,3.0,1.0],[2.0,1.0,3.0]]
    mask_centroids = [[0,1,1],[1,1,0]] 
    cluster_assignments = [0,1,1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments
    
    change = kmeans.assignment()
    assert change == True
    assert numpy.array_equal([0,0,1],kmeans.cluster_assignments)
    assert numpy.array_equal([[0,1],[2]],kmeans.data_point_assignments)
    
    # Test no change
    centroids = [[1.0,3.0,1.0],[2.0,1.0,3.0]]
    mask_centroids = [[0,1,1],[1,1,0]] 
    cluster_assignments = [0,0,1]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    kmeans.cluster_assignments = cluster_assignments
    
    change = kmeans.assignment()
    assert change == False
    assert numpy.array_equal([0,0,1],kmeans.cluster_assignments)
    assert numpy.array_equal([[0,1],[2]],kmeans.data_point_assignments)

예제 #6

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_create_matrix():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.cluster_assignments = numpy.array([1, 0, 1])
    kmeans.create_matrix()

    expected_clustering_results = [[0, 1], [1, 0], [0, 1]]
    clustering_results = kmeans.clustering_results
    assert numpy.array_equal(expected_clustering_results, clustering_results)

예제 #7

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_create_matrix():
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.cluster_assignments = numpy.array([1,0,1])
    kmeans.create_matrix()
    
    expected_clustering_results = [[0,1],[1,0],[0,1]]
    clustering_results = kmeans.clustering_results
    assert numpy.array_equal(expected_clustering_results,clustering_results)

예제 #8

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_random_cluster_centroid():
    X = [[1,2,3],[4,5,6]]
    M = [[0,1,1],[1,0,1]]
    K = 2
    
    kmeans = KMeans(X,M,K)
    kmeans.mins = [4.0,2.0,3.0]
    kmeans.maxs = [4.0,2.0,6.0]
    
    expected_centroid = [4.0,2.0,4.2617147424925346]
    random.seed(0)
    centroid = kmeans.random_cluster_centroid()
    assert numpy.array_equal(expected_centroid,centroid)

예제 #9

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_random_cluster_centroid():
    X = [[1, 2, 3], [4, 5, 6]]
    M = [[0, 1, 1], [1, 0, 1]]
    K = 2

    kmeans = KMeans(X, M, K)
    kmeans.mins = [4.0, 2.0, 3.0]
    kmeans.maxs = [4.0, 2.0, 6.0]

    expected_centroid = [4.0, 2.0, 4.2617147424925346]
    random.seed(0)
    centroid = kmeans.random_cluster_centroid()
    assert numpy.array_equal(expected_centroid, centroid)

예제 #10

0

파일 보기

파일: bnmtf_vb_optimised.py 프로젝트: ThomasBrouwer/BNMTF

 def initialise(self,init_S='random',init_FG='random',tauFSG={}):
     self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones((self.I,self.K))
     self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones((self.K,self.L))
     self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones((self.J,self.L))
     
     assert init_S in ['exp','random'], "Unrecognised init option for S: %s." % init_S
     self.muS = 1./self.lambdaS
     if init_S == 'random':
         for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):  
             self.muS[k,l] = exponential_draw(self.lambdaS[k,l])
     
     assert init_FG in ['exp','random','kmeans'], "Unrecognised init option for F,G: %s." % init_FG
     self.muF, self.muG = 1./self.lambdaF, 1./self.lambdaG
     if init_FG == 'random':
         for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)):        
             self.muF[i,k] = exponential_draw(self.lambdaF[i,k])
         for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)):
             self.muG[j,l] = exponential_draw(self.lambdaG[j,l])
     elif init_FG == 'kmeans':
         print "Initialising F using KMeans."
         kmeans_F = KMeans(self.R,self.M,self.K)
         kmeans_F.initialise()
         kmeans_F.cluster()
         self.muF = kmeans_F.clustering_results #+ 0.2            
         
         print "Initialising G using KMeans."
         kmeans_G = KMeans(self.R.T,self.M.T,self.L)   
         kmeans_G.initialise()
         kmeans_G.cluster()
         self.muG = kmeans_G.clustering_results #+ 0.2
     
     # Initialise the expectations and variances
     self.expF, self.varF = numpy.zeros((self.I,self.K)), numpy.zeros((self.I,self.K))
     self.expS, self.varS = numpy.zeros((self.K,self.L)), numpy.zeros((self.K,self.L))
     self.expG, self.varG = numpy.zeros((self.J,self.L)), numpy.zeros((self.J,self.L))
     
     for k in range(0,self.K):
         self.update_exp_F(k)
     for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):
         self.update_exp_S(k,l)
     for l in range(0,self.L):
         self.update_exp_G(l)
         
     # Initialise tau using the updates
     self.update_tau()
     #self.alpha_s, self.beta_s = self.alpha, self.beta
     self.update_exp_tau()

예제 #11

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_find_known_coordinate_values():
    # Normal test case
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1
    
    expected_lists_known_coordinate_values_0 = [[1.0],[2.0,5.0],[]]
    expected_lists_known_coordinate_values_1 = [[7.0],[8.0],[9.0]]
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)
    
    assert numpy.array_equal(expected_lists_known_coordinate_values_0,lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,lists_known_coordinate_values_1)
    
    # Cluster without any points
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1
    
    expected_lists_known_coordinate_values_0 = [[1.0,7.0],[2.0,5.0,8.0],[9.0]]
    expected_lists_known_coordinate_values_1 = None
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)
    
    assert numpy.array_equal(expected_lists_known_coordinate_values_0,lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,lists_known_coordinate_values_1)

예제 #12

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_compute_MSE():
    # Test case: no overlap
    X = numpy.ones((1, 5))
    M = numpy.ones((1, 5))
    K = 1

    x1 = [1.0, 2.0, 3.0, 4.0, 5.0]
    x2 = [5.0, 4.5, 3.0, 2.5, 1.0]
    mask1 = [0, 1, 1, 0, 0]
    mask2 = [1, 0, 0, 0, 1]
    kmeans = KMeans(X, M, K)

    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert output == None

    # Overlap
    mask1 = [1, 1, 1, 0, 1]
    mask2 = [0, 1, 1, 1, 1]

    expected_output = (2.5**2 + 4.0**2) / 3.0
    output = kmeans.compute_MSE(x1, x2, mask1, mask2)
    assert expected_output == output

예제 #13

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_compute_MSE():
    # Test case: no overlap
    X = numpy.ones((1,5))
    M = numpy.ones((1,5))
    K = 1
    
    x1 = [1.0,2.0,3.0,4.0,5.0]
    x2 = [5.0,4.5,3.0,2.5,1.0]
    mask1 = [0,1,1,0,0]
    mask2 = [1,0,0,0,1]
    kmeans = KMeans(X,M,K)
    
    output = kmeans.compute_MSE(x1,x2,mask1,mask2)
    assert output == None
    
    # Overlap
    mask1 = [1,1,1,0,1]
    mask2 = [0,1,1,1,1]
    
    expected_output = ( 2.5**2 + 4.0**2 ) / 3.0
    output = kmeans.compute_MSE(x1,x2,mask1,mask2)
    assert expected_output == output

예제 #14

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_find_point_furthest_away():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Equal distance for point 0
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    kmeans.closest_cluster(X[0], 0, M[0])  # MSE = 1.0 vs 1.0
    kmeans.closest_cluster(X[1], 1, M[1])  # MSE = 4.0 vs 16.0
    kmeans.closest_cluster(X[2], 2, M[2])  # MSE = 44.5 vs 37.0

    expected_furthest_away = 2
    furthest_away = kmeans.find_point_furthest_away()
    assert expected_furthest_away == furthest_away

예제 #15

0

파일 보기

파일: nmtf_np.py 프로젝트: lizhangzhan/BNMTF

 def initialise(self,init_S='random',init_FG='random',expo_prior=1.):
     assert init_S in ['ones','random','exponential'], "Unrecognised init option for S: %s." % init_S
     assert init_FG in ['ones','random','exponential','kmeans'], "Unrecognised init option for F,G: %s." % init_FG
     
     if init_S == 'ones':
         self.S = numpy.ones((self.K,self.L))
     elif init_S == 'random':
         self.S = numpy.random.rand(self.K,self.L)
     elif init_S == 'exponential':
         self.S = numpy.empty((self.K,self.L))
         for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)):        
             self.S[k,l] = exponential_draw(expo_prior)
     
     if init_FG == 'ones':
         self.F = numpy.ones((self.I,self.K))
         self.G = numpy.ones((self.J,self.L))
     elif init_FG == 'random':
         self.F = numpy.random.rand(self.I,self.K)
         self.G = numpy.random.rand(self.J,self.L)
     elif init_FG == 'exponential':
         self.F = numpy.empty((self.I,self.K))
         self.G = numpy.empty((self.J,self.L))
         for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)):        
             self.F[i,k] = exponential_draw(expo_prior)
         for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)):
             self.G[j,l] = exponential_draw(expo_prior)
     elif init_FG == 'kmeans':
         print "Initialising F using KMeans."
         kmeans_F = KMeans(self.R,self.M,self.K)
         kmeans_F.initialise()
         kmeans_F.cluster()
         self.F = kmeans_F.clustering_results + 0.2            
         
         print "Initialising G using KMeans."
         kmeans_G = KMeans(self.R.T,self.M.T,self.L)   
         kmeans_G.initialise()
         kmeans_G.cluster()
         self.G = kmeans_G.clustering_results + 0.2

예제 #16

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_find_point_furthest_away():
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    
    # Equal distance for point 0
    centroids = [[1.0,3.0,1.0],[2.0,1.0,3.0]]
    mask_centroids = [[0,1,1],[1,1,0]] 
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    
    kmeans.closest_cluster(X[0],0,M[0]) # MSE = 1.0 vs 1.0
    kmeans.closest_cluster(X[1],1,M[1]) # MSE = 4.0 vs 16.0
    kmeans.closest_cluster(X[2],2,M[2]) # MSE = 44.5 vs 37.0
    
    expected_furthest_away = 2
    furthest_away = kmeans.find_point_furthest_away()
    assert expected_furthest_away == furthest_away

예제 #17

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_closest_cluster():
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)

    # Equal distance for point 0
    centroids = [[1.0, 3.0, 1.0], [2.0, 1.0, 3.0]]
    mask_centroids = [[0, 1, 1], [1, 1, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    expected_closest_cluster_0 = 0  # MSE = 1.0 vs 1.0
    expected_closest_cluster_1 = 0  # MSE = 4.0 vs 16.0
    expected_closest_cluster_2 = 1  # MSE = 44.5 vs 37.0
    closest_cluster_0 = kmeans.closest_cluster(X[0], 0, M[0])
    closest_cluster_1 = kmeans.closest_cluster(X[1], 1, M[1])
    closest_cluster_2 = kmeans.closest_cluster(X[2], 2, M[2])

    assert expected_closest_cluster_0 == closest_cluster_0
    assert expected_closest_cluster_1 == closest_cluster_1
    assert expected_closest_cluster_2 == closest_cluster_2

    # Also test whether the distances are set correctly
    expected_distances = [1.0, 4.0, 37.0]
    distances = kmeans.distances
    assert numpy.array_equal(expected_distances, distances)

    # Test when all MSEs return None (impossible but still testing behaviour)
    centroids = numpy.ones((2, 3))
    mask_centroids = [[0, 0, 1], [0, 0, 0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids

    expected_closest_cluster = 1
    closest_cluster = kmeans.closest_cluster(X[0], 0, M[0])
    assert expected_closest_cluster == closest_cluster

예제 #18

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_closest_cluster():
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    
    # Equal distance for point 0
    centroids = [[1.0,3.0,1.0],[2.0,1.0,3.0]]
    mask_centroids = [[0,1,1],[1,1,0]] 
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    
    expected_closest_cluster_0 = 0 # MSE = 1.0 vs 1.0
    expected_closest_cluster_1 = 0 # MSE = 4.0 vs 16.0
    expected_closest_cluster_2 = 1 # MSE = 44.5 vs 37.0
    closest_cluster_0 = kmeans.closest_cluster(X[0],0,M[0])
    closest_cluster_1 = kmeans.closest_cluster(X[1],1,M[1])
    closest_cluster_2 = kmeans.closest_cluster(X[2],2,M[2])
    
    assert expected_closest_cluster_0 == closest_cluster_0
    assert expected_closest_cluster_1 == closest_cluster_1
    assert expected_closest_cluster_2 == closest_cluster_2
    
    # Also test whether the distances are set correctly
    expected_distances = [1.0,4.0,37.0]
    distances = kmeans.distances
    assert numpy.array_equal(expected_distances,distances)
    
    # Test when all MSEs return None (impossible but still testing behaviour)
    centroids = numpy.ones((2,3))
    mask_centroids = [[0,0,1],[0,0,0]]
    kmeans.centroids = centroids
    kmeans.mask_centroids = mask_centroids
    
    expected_closest_cluster = 1
    closest_cluster = kmeans.closest_cluster(X[0],0,M[0])
    assert expected_closest_cluster == closest_cluster

예제 #19

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_find_known_coordinate_values():
    # Normal test case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0], [2.0, 5.0], []]
    expected_lists_known_coordinate_values_1 = [[7.0], [8.0], [9.0]]
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)

    # Cluster without any points
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1

    expected_lists_known_coordinate_values_0 = [[1.0, 7.0], [2.0, 5.0, 8.0],
                                                [9.0]]
    expected_lists_known_coordinate_values_1 = None
    lists_known_coordinate_values_0 = kmeans.find_known_coordinate_values(0)
    lists_known_coordinate_values_1 = kmeans.find_known_coordinate_values(1)

    assert numpy.array_equal(expected_lists_known_coordinate_values_0,
                             lists_known_coordinate_values_0)
    assert numpy.array_equal(expected_lists_known_coordinate_values_1,
                             lists_known_coordinate_values_1)

예제 #20

0

파일 보기

    def initialise(self, init_S='random', init_FG='random', tauFSG={}):
        self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones(
            (self.I, self.K))
        self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones(
            (self.K, self.L))
        self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones(
            (self.J, self.L))

        assert init_S in ['exp', 'random'
                          ], "Unrecognised init option for S: %s." % init_S
        self.muS = 1. / self.lambdaS
        if init_S == 'random':
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.muS[k, l] = exponential_draw(self.lambdaS[k, l])

        assert init_FG in ['exp', 'random', 'kmeans'
                           ], "Unrecognised init option for F,G: %s." % init_FG
        self.muF, self.muG = 1. / self.lambdaF, 1. / self.lambdaG
        if init_FG == 'random':
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.muF[i, k] = exponential_draw(self.lambdaF[i, k])
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.muG[j, l] = exponential_draw(self.lambdaG[j, l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.muF = kmeans_F.clustering_results  #+ 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.muG = kmeans_G.clustering_results  #+ 0.2

        # Initialise the expectations and variances
        self.expF, self.varF = numpy.zeros((self.I, self.K)), numpy.zeros(
            (self.I, self.K))
        self.expS, self.varS = numpy.zeros((self.K, self.L)), numpy.zeros(
            (self.K, self.L))
        self.expG, self.varG = numpy.zeros((self.J, self.L)), numpy.zeros(
            (self.J, self.L))

        for k in range(0, self.K):
            self.update_exp_F(k)
        for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)):
            self.update_exp_S(k, l)
        for l in range(0, self.L):
            self.update_exp_G(l)

        # Initialise tau using the updates
        self.update_tau()
        #self.alpha_s, self.beta_s = self.alpha, self.beta
        self.update_exp_tau()

예제 #21

0

파일 보기

    def initialise(self, init_S='random', init_FG='random'):
        assert init_S in [
            'random', 'exp'
        ], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S
        assert init_FG in [
            'random', 'exp', 'kmeans'
        ], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG

        self.S = 1. / self.lambdaS
        if init_S == 'random':
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.S[k, l] = exponential_draw(self.lambdaS[k, l])

        self.F, self.G = 1. / self.lambdaF, 1. / self.lambdaG
        if init_FG == 'random':
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.F[i, k] = exponential_draw(self.lambdaF[i, k])
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.G[j, l] = exponential_draw(self.lambdaG[j, l])
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2

        self.tau = gamma_mode(self.alpha_s(), self.beta_s())

예제 #22

0

파일 보기

파일: cluster_rows_columns.py 프로젝트: mikimaus78/NMTF_drug_sensitivity_prediction

def cluster(R,M,K):
    kmeans = KMeans(R,M,K)
    kmeans.initialise()
    kmeans.cluster()
    return kmeans.clustering_results

예제 #23

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_init():
    # Test getting an exception when X and M are different sizes, X is not a 2D array, and K <= 0
    X1 = numpy.ones(3)
    M = numpy.ones((2, 3))
    K = 0
    with pytest.raises(AssertionError) as error:
        KMeans(X1, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not a two-dimensional array, but instead 1-dimensional."

    X2 = numpy.ones((4, 3, 2))
    with pytest.raises(AssertionError) as error:
        KMeans(X2, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not a two-dimensional array, but instead 3-dimensional."

    X3 = numpy.ones((3, 2))
    with pytest.raises(AssertionError) as error:
        KMeans(X3, M, K)
    assert str(
        error.value
    ) == "Input matrix X is not of the same size as the indicator matrix M: (3, 2) and (2, 3) respectively."

    X4 = numpy.ones((2, 3))
    K1 = 0
    with pytest.raises(AssertionError) as error:
        KMeans(X4, M, K1)
    assert str(error.value) == "K should be greater than 0."

    # Test getting an exception if a row or column is entirely unknown
    X = numpy.ones((2, 3))
    M1 = [[1, 1, 1], [0, 0, 0]]
    M2 = [[1, 1, 0], [1, 0, 0]]
    K = 1

    with pytest.raises(AssertionError) as error:
        KMeans(X, M1, K)
    assert str(error.value) == "Fully unobserved row in X, row 1."
    with pytest.raises(AssertionError) as error:
        KMeans(X, M2, K)
    assert str(error.value) == "Fully unobserved column in X, column 2."

    # Test completely observed case
    X = numpy.ones((2, 3))
    M = numpy.ones((2, 3))

    omega_rows = [[0, 1, 2], [0, 1, 2]]
    omega_columns = [[0, 1], [0, 1], [0, 1]]

    kmeans = KMeans(X, M, K)
    assert numpy.array_equal(omega_rows, kmeans.omega_rows)
    assert numpy.array_equal(omega_columns, kmeans.omega_columns)
    assert kmeans.no_points == 2
    assert kmeans.no_coordinates == 3

    # Test partially observed case
    M = [[1, 0, 1], [0, 1, 1]]

    omega_rows = [[0, 2], [1, 2]]
    omega_columns = [[0], [1], [0, 1]]

    kmeans = KMeans(X, M, K)
    assert numpy.array_equal(omega_rows, kmeans.omega_rows)
    assert numpy.array_equal(omega_columns, kmeans.omega_columns)

예제 #24

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_cluster():
    ### No missing values case.
    # Points 1,2 will first go to cluster 2, and point 3 to cluster 1.
    # Then point 1 will switch to cluster 1.
    X = [[2, 5], [7, 5], [2, 3]]
    M = numpy.ones((3, 2))
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 2.0], [4.0, 5.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1]

    expected_centroids = [[2.0, 4.0], [7.0, 5.0]]
    expected_cluster_assignments = [0, 1, 0]
    expected_data_point_assignments = [[0, 2], [1]]
    expected_clustering_results = [[1, 0], [0, 1], [1, 0]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

    ### Missing values case.
    # Points 2,3,4 will first go to cluster 2, and point 1 to cluster 1.
    # Then point 2 will switch to cluster 1.
    X = [[2, 5], [3, -1], [10, 1], [-1, 2]]
    M = [[1, 1], [1, 0], [1, 1], [0, 1]]
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 7.0], [3.0, 2.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1, -1]

    expected_centroids = [[2.5, 5.0], [10.0, 1.5]]
    expected_cluster_assignments = [0, 0, 1, 1]
    expected_data_point_assignments = [[0, 1], [2, 3]]
    expected_clustering_results = [[1, 0], [1, 0], [0, 1], [0, 1]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

    ### Cluster with 0 coordinate.
    # Cluster 1 gets points 1 and 2, cluster 2 gets 3 and 4.
    X = [[2, 5], [3, -1], [-1, 1], [-1, 2]]
    M = [[1, 1], [1, 0], [0, 1], [0, 1]]
    K = 2
    kmeans = KMeans(X, M, K)

    kmeans.centroids = [[2.0, 7.0], [4.0, 4.0]]
    kmeans.mask_centroids = numpy.ones((2, 2))
    kmeans.cluster_assignments = [-1, -1, -1, -1]

    expected_centroids = [[2.5, 5.0], [0, 1.5]]
    expected_cluster_assignments = [0, 0, 1, 1]
    expected_data_point_assignments = [[0, 1], [2, 3]]
    expected_clustering_results = [[1, 0], [1, 0], [0, 1], [0, 1]]

    kmeans.cluster()
    assert numpy.array_equal(expected_centroids, kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,
                             kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,
                             kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,
                             kmeans.clustering_results)

예제 #25

0

파일 보기

파일: test_kmeans.py 프로젝트: ibrahim85/kmeans_missing

def test_update():
    # Normal case
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K)
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2]])  #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]

    new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]]
    new_mask_centroids = [[1, 1, 0], [1, 1, 1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 2
    kmeans = KMeans(X, M, K, 'random')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1, 2], []])  #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[4.0, 5.0, 9.0],
                     [6.066531109150288, 6.547726417641815, 9.0]]
    new_mask_centroids = [[1, 1, 1], [1, 1, 1]]

    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids, kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids)

    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2.
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
    M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]])
    K = 3
    kmeans = KMeans(X, M, K, resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array(
        [[0, 1], [2],
         []])  #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0, 0, 1]
    kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0],
                        [500.0, 500.0, 500.0]]
    kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1],
                           kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2],
                           kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0, 2.0, 9.0]
    kmeans.maxs = [7.0, 8.0, 9.0]

    new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]
    new_data_point_assignments = [[0], [1], [2]]
    new_distances = [0, 0, 0]

    kmeans.update()

    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances, kmeans.distances)
    assert numpy.array_equal(new_centroids, kmeans.centroids)

예제 #26

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_update():
    # Normal case
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K)
    kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    
    new_centroids = [[1.0,3.5,0],[7.0,8.0,9.0]]
    new_mask_centroids = [[1,1,0],[1,1,1]]
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 2
    kmeans = KMeans(X,M,K,'random')
    kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1
    kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]]
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[4.0,5.0,9.0],[6.066531109150288,6.547726417641815,9.0]]
    new_mask_centroids = [[1,1,1],[1,1,1]]
    
    random.seed(0)
    kmeans.update()
    assert numpy.array_equal(new_centroids,kmeans.centroids)
    assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids)
    
    # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster
    # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. 
    # Point 2 is furthest away, so gets reassigned to cluster 2 - making 
    # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1
    X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]])
    M = numpy.array([[1,1,0],[0,1,0],[1,1,1]])
    K = 3
    kmeans = KMeans(X,M,K,resolve_empty='singleton')
    kmeans.data_point_assignments = numpy.array([[0,1],[2],[]]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2
    kmeans.cluster_assignments = [0,0,1]
    kmeans.centroids = [[1.0,2.0,3.0],[15.0,16.0,17.0],[500.0,500.0,500.0]]
    kmeans.mask_centroids = [[1,1,0],[1,1,1],[1,1,1]]
    kmeans.distances = numpy.array([
        kmeans.compute_MSE(kmeans.X[0],kmeans.centroids[0],M[0],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[1],kmeans.centroids[0],M[1],kmeans.mask_centroids[0]),
        kmeans.compute_MSE(kmeans.X[2],kmeans.centroids[1],M[2],kmeans.mask_centroids[1])
    ])
    kmeans.mins = [1.0,2.0,9.0]
    kmeans.maxs = [7.0,8.0,9.0]
    
    new_centroids = [[1.0,2.0,0],[4.0,5.0,6.0],[7.0,8.0,9.0]]
    new_data_point_assignments = [[0],[1],[2]]
    new_distances = [0,0,0]
    
    kmeans.update()
    
    assert new_data_point_assignments == list(kmeans.data_point_assignments)
    assert numpy.array_equal(new_distances,kmeans.distances)
    assert numpy.array_equal(new_centroids,kmeans.centroids)

예제 #27

0

파일 보기

파일: test_kmeans.py 프로젝트: ThomasBrouwer/kmeans_missing

def test_cluster():
    ### No missing values case.
    # Points 1,2 will first go to cluster 2, and point 3 to cluster 1.
    # Then point 1 will switch to cluster 1.
    X = [[2,5],[7,5],[2,3]]
    M = numpy.ones((3,2))
    K = 2
    kmeans = KMeans(X,M,K)
    
    kmeans.centroids = [[2.0,2.0],[4.0,5.0]]
    kmeans.mask_centroids = numpy.ones((2,2))
    kmeans.cluster_assignments = [-1,-1,-1]
    
    expected_centroids = [[2.0,4.0],[7.0,5.0]] 
    expected_cluster_assignments = [0,1,0]
    expected_data_point_assignments = [[0,2],[1]]
    expected_clustering_results = [[1,0],[0,1],[1,0]]
    
    kmeans.cluster()
    assert numpy.array_equal(expected_centroids,kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,kmeans.clustering_results)
    
    ### Missing values case.
    # Points 2,3,4 will first go to cluster 2, and point 1 to cluster 1.
    # Then point 2 will switch to cluster 1.
    X = [[2,5],[3,-1],[10,1],[-1,2]]
    M = [[1,1],[1,0],[1,1],[0,1]]
    K = 2
    kmeans = KMeans(X,M,K)
    
    kmeans.centroids = [[2.0,7.0],[3.0,2.0]]
    kmeans.mask_centroids = numpy.ones((2,2))
    kmeans.cluster_assignments = [-1,-1,-1,-1]
    
    expected_centroids = [[2.5,5.0],[10.0,1.5]] 
    expected_cluster_assignments = [0,0,1,1]
    expected_data_point_assignments = [[0,1],[2,3]]
    expected_clustering_results = [[1,0],[1,0],[0,1],[0,1]]
    
    kmeans.cluster()
    assert numpy.array_equal(expected_centroids,kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,kmeans.clustering_results)
    
    ### Cluster with 0 coordinate.
    # Cluster 1 gets points 1 and 2, cluster 2 gets 3 and 4.
    X = [[2,5],[3,-1],[-1,1],[-1,2]]
    M = [[1,1],[1,0],[0,1],[0,1]]
    K = 2
    kmeans = KMeans(X,M,K)
    
    kmeans.centroids = [[2.0,7.0],[4.0,4.0]]
    kmeans.mask_centroids = numpy.ones((2,2))
    kmeans.cluster_assignments = [-1,-1,-1,-1]
    
    expected_centroids = [[2.5,5.0],[0,1.5]] 
    expected_cluster_assignments = [0,0,1,1]
    expected_data_point_assignments = [[0,1],[2,3]]
    expected_clustering_results = [[1,0],[1,0],[0,1],[0,1]]
    
    kmeans.cluster()
    assert numpy.array_equal(expected_centroids,kmeans.centroids)
    assert numpy.array_equal(expected_cluster_assignments,kmeans.cluster_assignments)
    assert numpy.array_equal(expected_data_point_assignments,kmeans.data_point_assignments)
    assert numpy.array_equal(expected_clustering_results,kmeans.clustering_results)

예제 #28

0

파일 보기

    def initialise(self, init_S='random', init_FG='random', expo_prior=1.):
        assert init_S in ['ones', 'random', 'exponential'
                          ], "Unrecognised init option for S: %s." % init_S
        assert init_FG in ['ones', 'random', 'exponential', 'kmeans'
                           ], "Unrecognised init option for F,G: %s." % init_FG

        if init_S == 'ones':
            self.S = numpy.ones((self.K, self.L))
        elif init_S == 'random':
            self.S = numpy.random.rand(self.K, self.L)
        elif init_S == 'exponential':
            self.S = numpy.empty((self.K, self.L))
            for k, l in itertools.product(xrange(0, self.K), xrange(0,
                                                                    self.L)):
                self.S[k, l] = exponential_draw(expo_prior)

        if init_FG == 'ones':
            self.F = numpy.ones((self.I, self.K))
            self.G = numpy.ones((self.J, self.L))
        elif init_FG == 'random':
            self.F = numpy.random.rand(self.I, self.K)
            self.G = numpy.random.rand(self.J, self.L)
        elif init_FG == 'exponential':
            self.F = numpy.empty((self.I, self.K))
            self.G = numpy.empty((self.J, self.L))
            for i, k in itertools.product(xrange(0, self.I), xrange(0,
                                                                    self.K)):
                self.F[i, k] = exponential_draw(expo_prior)
            for j, l in itertools.product(xrange(0, self.J), xrange(0,
                                                                    self.L)):
                self.G[j, l] = exponential_draw(expo_prior)
        elif init_FG == 'kmeans':
            print "Initialising F using KMeans."
            kmeans_F = KMeans(self.R, self.M, self.K)
            kmeans_F.initialise()
            kmeans_F.cluster()
            self.F = kmeans_F.clustering_results + 0.2

            print "Initialising G using KMeans."
            kmeans_G = KMeans(self.R.T, self.M.T, self.L)
            kmeans_G.initialise()
            kmeans_G.cluster()
            self.G = kmeans_G.clustering_results + 0.2