예제 #1
0
 def __init__(self,
              n_cluster: int,
              data: np.ndarray,
              use_kmeans: bool = False,
              w: float = 0.9,
              c1: float = 0.5,
              c2: float = 0.3,
              flag: int = 1,
              weights: list = None):
     index = np.random.choice(list(range(len(data))), n_cluster)
     self.centroids = data[index].copy()
     if use_kmeans:
         kmeans = KMeans(n_cluster=n_cluster, init_pp=False)
         kmeans.fit(data)
         self.centroids = kmeans.centroid.copy()
     self.best_position = self.centroids.copy()
     self.best_score = quantization_error(self.centroids, self._predict(data), data)
     self.flag=flag
     if self.flag%2==1:
         self.best_sse = calc_sse(self.centroids, self._predict(data), data)
     else:
         self.best_sse = calc_sse2(self.centroids, self._predict(data), data, weights)
     self.velocity = np.zeros_like(self.centroids)
     self._w = w
     self._c1 = c1
     self._c2 = c2
예제 #2
0
 def _update_centroids(self, data: np.ndarray):
     self.centroids = self.centroids + self.velocity
     new_score = quantization_error(self.centroids, self._predict(data),
                                    data)
     sse = calc_sse(self.centroids, self._predict(data), data)
     self.best_sse = min(sse, self.best_sse)
     if new_score < self.best_score:
         self.best_score = new_score
         self.best_position = self.centroids.copy()
예제 #3
0
 def __init__(self,
              n_cluster: int,
              data: np.ndarray,
              use_kmeans: bool = False,
              w: int = 9,
              c1: int = 5,
              c2: int = 3):
     index = np.random.choice(list(range(len(data))), n_cluster)
     self.centroids = data[index].copy()
     if use_kmeans:
         kmeans = KMeans(n_cluster=n_cluster, init_pp=False)
         kmeans.fit(data)
         self.centroids = kmeans.centroid.copy()
     self.best_position = self.centroids.copy()
     self.best_score = quantization_error(self.centroids,
                                          self._predict(data), data)
     self.best_sse = calc_sse(self.centroids, self._predict(data), data)
     self.velocity = np.zeros_like(self.centroids)
     self._w = w
     self._c1 = c1
     self._c2 = c2
예제 #4
0
def kmeans_custom(k, data, stdev_mean):
    num_examples = len(data)
    num_features = len(data[0]['point'])
    min_sse = float("inf")
    min_sse_clusters = []
    sse_list = []
    
    clusters = init_clusters_custom(k, data)
    # do 50 iterations
    for i in range (0, 49):
        kmeans.calc_cluster_centroids(clusters)
        if kmeans.reassign_clusters(clusters) == False:
          #  print "break at " + str(i)
            break
      
    sse = kmeans.calc_sse(clusters)
    if sse < min_sse:
        min_sse_clusters = copy.deepcopy(clusters)
        min_sse = sse
   
    print "k = " + str(k)
    kmeans.print_cluster_centroids(min_sse_clusters, stdev_mean)
    print ""