def cluster(self, method='ward'):
        l_method = agglomerative_l_method(self.X, method=method)
        self.sub_clusters_cnt = len(l_method.cluster_centers_)
        # print('sub_clusters_cnt:', self.sub_clusters_cnt, 'cnt:', self.cnt)

        self.clustering_model = DividableClustering()
        self.clustering_model.fit(self.X, l_method.labels_)
Esempio n. 2
0
    def fn(inst):
        if not 'x' in inst:
            raise Exception('no x')

        x = inst['x']
        model = l_method.agglomerative_l_method(x)
        return inst.set('prediction', model.labels_)\
            .set('centroids', model.cluster_centers_)
def l_method(ax, X, method):
    l_method = agglomerative_l_method(X, method=method)
    suggest_n = len(l_method.cluster_centers_)
    cmap = get_cmap(suggest_n + 1)
    for label in range(suggest_n):
        XX = list(map(lambda xy: xy[0],
                      filter(lambda xy: xy[1] == label,
                             zip(X, l_method.labels_))))
        plot(ax, XX, c=cmap(label), edgecolors='none')
    def cluster(self):
        l_method = agglomerative_l_method(self.X)

        # suggest_n = len(l_method.cluster_centers_)
        # agg = AgglomerativeClustering(suggest_n)
        # agg.fit(self.X)

        # agg_labels = agg.labels_
        # l_method_labels = l_method.labels_
        #
        # print('agg_labels:', agg_labels)
        # print('l_method_labels:', l_method_labels)

        # first tier clustering, using agglomerative clustering
        self.clustering_model = DividableClustering()
        self.clustering_model.fit(self.X, l_method.labels_)
    def cluster(self, method='ward'):
        assert len(self.X) == len(self.y_seed)

        l_method = agglomerative_l_method(self.X, method=method)

        # first tier clustering, using agglomerative clustering
        self.clustering_model = DividableClustering()
        self.clustering_model.fit(self.X, l_method.labels_)

        # second tier, using kmeans
        for suspect_label in range(self.clustering_model.latest_label):
            ind_X = self.clustering_model.get_X_with_idx(suspect_label)
            y_seed = []
            X = []
            for x, idx in ind_X:
                X.append(x)
                y_seed.append(self.y_seed[idx])

            # no collision in this sub-group
            if not self.has_collision(X, y_seed):
                continue

            # there is collisions in this sub-group
            low_cnt = 2
            high_cnt = len(X)
            last_possible_labels = None
            while low_cnt <= high_cnt:
                # 1/4 biased binary search
                cluster_cnt = int((high_cnt - low_cnt) * 1/4 + low_cnt)
                kmeans = KMeans(cluster_cnt)
                kmeans.fit(X)

                if not self.has_collision(X, y_seed, kmeans):
                    last_possible_labels = kmeans.labels_
                    high_cnt = cluster_cnt - 1
                else:
                    low_cnt = cluster_cnt + 1

            self.splitting_score += cluster_cnt
            print('split sub_clusters_cnt:', cluster_cnt, 'cnt:', len(X), 'main cnt:', self.cnt)
            self.clustering_model.split(suspect_label, last_possible_labels)

        self.clustering_model.relabel()
from dividable_clustering import DividableClustering
from agglomerative_clustering import AgglomerativeClustering
from sklearn.cluster import KMeans
from dataset import *
from sklearn.neighbors import BallTree
from l_method import agglomerative_l_method

dataset = get_iris()

l_method = agglomerative_l_method(dataset.X)

model = DividableClustering()
model.fit(dataset.X, l_method.labels_)

print('labels:', l_method.labels_)

print('predicts:', model.predict(dataset.X))
 def fit(self, x):
     self.x = x
     labels = agglomerative_l_method(x)
     labels = np.array(labels)
     self.labels_ = labels
    def static(index):
        return 'rgb'[index]

    return map_index_to_rgb_color

dataset = get_iris()

pca = PCA(2)
pca.fit(dataset.X)
X = pca.transform(dataset.X)
# X = dataset.X
# X = list(map(lambda x: x[:2], dataset.X))

print('X:', X)

l_method = agglomerative_l_method(X)
clusters_cnt = len(l_method.cluster_centers_)

# agg = AgglomerativeClustering(clusters_cnt).fit(X)
# labels = agg.labels_
labels = l_method.labels_

print('X:', X)

X_by_label = {}
for x, label in zip(X, labels):
    if label not in X_by_label:
        X_by_label[label] = []

    X_by_label[label].append(x)