Esempio n. 1
0
def kkmeans():
    # 20孩子,每个孩子10天数据
    raw_data = '[[1, 8, 20, 19, 4, 8, 3, 0, 0, 1], [18, 18, 1, 19, 4, 13, 6, 19, 4, 1], [4, 7, 10, 3, 6, 4, 10, 20, 11, 10], [12, 5, 4, 14, 7, 10, 16, 2, 9, 17], [7, 19, 14, 17, 11, 15, 19, 6, 8, 6], [17, 7, 3, 5, 7, 20, 1, 16, 13, 3], [19, 11, 10, 0, 17, 2, 14, 15, 5, 6], [4, 14, 18, 9, 19, 19, 1, 18, 20, 7], [20, 15, 8, 3, 12, 1, 12, 6, 0, 10], [18, 16, 17, 6, 0, 9, 9, 11, 2, 8], [2, 2, 9, 3, 19, 18, 1, 16, 9, 20], [15, 15, 13, 19, 11, 7, 20, 8, 14, 6], [1, 20, 1, 17, 4, 3, 13, 4, 2, 18], [0, 16, 18, 20, 16, 14, 8, 20, 5, 14], [11, 1, 7, 17, 17, 11, 10, 14, 6, 16], [8, 12, 15, 8, 5, 18, 19, 1, 13, 4], [17, 20, 13, 9, 11, 0, 16, 8, 16, 15], [3, 2, 12, 8, 8, 5, 7, 8, 20, 3], [20, 2, 2, 13, 4, 20, 0, 4, 14, 11], [20, 3, 12, 9, 14, 18, 17, 7, 5, 7]]'
    data = eval(raw_data)

    kmeans = Kmeans(data, 3)
    kmeans.get_k_rand()
    kmeans.compare_to_k()
    kmeans.get_k_avarage()
    cl = kmeans.compare_to_k2()

    while True:
        if len(cl[0]["data"]["rows"]) < 4 or len(
                cl[1]["data"]["rows"]) < 4 or len(cl[2]["data"]["rows"]) < 4:
            kmeans = Kmeans(data, 3)
            kmeans.get_k_rand()
            kmeans.compare_to_k()
            kmeans.get_k_avarage()
            cl = kmeans.compare_to_k2()

    data1 = kmeans_helper(cl[0])
    data2 = kmeans_helper(cl[1])
    data3 = kmeans_helper(cl[2])

    data_all = kmeans_helper(cl[0] + cl[1] + cl[2])

    pprint.pprint(data1)
    pprint.pprint(data2)
    pprint.pprint(data3)
    pprint.pprint(data_all)
Esempio n. 2
0
    def initialization(self):
        # init basic matrices
        self.W = np.zeros((self._data_dimension, self._num_bases))
        self.H = np.zeros((self._num_bases, self._num_samples))
        self.G = np.zeros((self._num_samples, self._num_bases))
        #####

        # initialize using k-means
        km = Kmeans(self.data[:, :],
                    num_bases=self._num_bases,
                    show_progress=self._show_progress)
        km.initialization()
        km.factorize()
        assign = km.assigned

        num_i = np.zeros(self._num_bases)
        for i in range(self._num_bases):
            num_i[i] = len(np.where(assign == i)[0])

        self.G[range(len(assign)), assign] = 1.0
        self.G += 0.01
        self.G /= np.tile(np.reshape(num_i[assign], (-1, 1)), self.G.shape[1])

        self.H.T[range(len(assign)), assign] = 1.0
        self.H += 0.2 * np.ones((self._num_bases, self._num_samples))

        self.W = np.dot(self.data[:, :], self.G)
Esempio n. 3
0
    def init_h(self):
        if not hasattr(self, 'H'):
            # init basic matrices
            self.H = np.zeros((self._num_bases, self._num_samples))

            # initialize using k-means
            km = Kmeans(self.data[:, :],
                        num_bases=self._num_bases,
                        seed=self.seed)
            km.factorize(niter=10)
            assign = km.assigned

            num_i = np.zeros(self._num_bases)
            for i in range(self._num_bases):
                num_i[i] = len(np.where(assign == i)[0])

            self.H.T[range(len(assign)), assign] = 1.0
            self.H += 0.2 * np.ones((self._num_bases, self._num_samples))

        if not hasattr(self, 'G'):
            self.G = np.zeros((self._num_samples, self._num_bases))

            self.G[range(len(assign)), assign] = 1.0
            self.G += 0.01
            self.G /= np.tile(np.reshape(num_i[assign], (-1, 1)),
                              self.G.shape[1])

        if not hasattr(self, 'W'):
            self.W = np.dot(self.data[:, :], self.G)
Esempio n. 4
0
    def create_splits(self, X):
        # get shape of dataset
        N, D = X.shape

        # thresholds is set of K-Means of each feature
        self.thresholds = []

        for d in range(D):
            # reshape (n,) to (n,1)
            feature = X[:, d]
            feature = np.reshape(feature, [feature.size, 1])

            # Initialize K-Means model
            k_means = Kmeans(k=k)
            min_err = np.inf
            min_err_means = None

            for i in range(50):
                k_means.fit(feature)
                error = k_means.error(feature)
                if error < min_err:
                    min_err = error
                    min_err_means = k_means.means

            self.thresholds.append(min_err_means)
Esempio n. 5
0
        def select_next(iterval):
            """ select the next best data sample using robust map
            or simply the max iterval ... """

            if self._robust_map:
                k = np.argsort(iterval)[::-1]
                d_sub = self.data[:, k[:self._robust_nselect]]
                self.sub.extend(k[:self._robust_nselect])

                # cluster d_sub
                kmeans_mdl = Kmeans(d_sub, num_bases=self._robust_cluster)
                kmeans_mdl.factorize(niter=10)

                # get largest cluster
                h = np.histogram(kmeans_mdl.assigned,
                                 range(self._robust_cluster + 1))[0]
                largest_cluster = np.argmax(h)
                sel = pdist(
                    kmeans_mdl.W[:, largest_cluster:largest_cluster + 1],
                    d_sub)
                sel = k[np.argmin(sel)]
            else:
                sel = np.argmax(iterval)

            return sel
Esempio n. 6
0
def main():
    # 1.读取数据
    dataDF = getDF()

    # 2.测试最佳K值,第一次出现明显拐角处便是最佳K值
    km = Kmeans()
    km.searchK(SAVAPATH,dataDF,2,12)    # 查看保存的图片,选择最佳K值
    def quantize(self, img):
        """
        Quantizes an image into 2^b clusters

        Parameters
        ----------
        img : a (H,W,3) numpy array

        Returns
        -------
        quantized_img : a (H,W,1) numpy array containing cluster indices

        Stores
        ------
        colours : a (2^b, 3) numpy array, each row is a colour

        """

        H, W, _ = img.shape
        pixels = img.reshape((-1, 3))
        model = Kmeans(2**self.b)
        model.fit(pixels)
        quantized_img = model.predict(pixels).reshape((H, W, 1))
        self.colours = model.means

        return quantized_img
Esempio n. 8
0
def main():
    # Reading the training data
    path_train = './data/EMGaussian.data'
    path_test = './data/EMGaussian.test'
    data = dp.parse_data_wo_labels(path_train, 2, delimiter=' ')
    data_test = dp.parse_data_wo_labels(path_test, 2, delimiter=' ')

    # Initialization with K-means
    best_kmean_model = None
    min_distortion = float("inf")
    distortions = []
    for i in xrange(NB_INITIALIZATION_RETRIES):
        kmean_model = Kmeans(data, NB_CLUSTERS, MAX_K_MEAN_ITER)
        kmean_model.run()
        distortions.append(kmean_model.distortion)

        if kmean_model.distortion < min_distortion:
            best_kmean_model = kmean_model
            min_distortion = kmean_model.distortion
    # Showing the distortions
    plt.plot(range(1, NB_INITIALIZATION_RETRIES + 1), distortions)
    plt.xlabel("Initialization number")
    plt.ylabel("Distortion")
    plt.title("Running few Kmeans and measuring the distortion for each")
    plt.show()

    # Plotting the result
    best_kmean_model.plot()

    # Case where the covariance matrix is proportional to identity
    run_em_model(data, data_test, best_kmean_model, sigma_prop_identity=True)

    # General Case
    run_em_model(data, data_test, best_kmean_model, sigma_prop_identity=False)
Esempio n. 9
0
    def kmeansselect(self):
        kmeans_mdl = Kmeans(self.data, num_bases=self._nsub)
        kmeans_mdl.initialization()
        kmeans_mdl.factorize()

        # pick data samples closest to the centres
        idx = dist.vq(kmeans_mdl.data, kmeans_mdl.W)
        return idx
 def quantize(self, img):
     b = self.b
     C, R, D = img.shape
     self.img = img
     X = np.reshape(img, (C * R, D))
     model = Kmeans(k=pow(2, b))
     model.fit(X)
     self.model = model
     return model.means
def main():
    dataset1 = np.genfromtxt(r'../data/new_dataset_1.txt',
                             dtype=float,
                             delimiter='\t')
    dataset2 = np.genfromtxt(r'../data/cho.txt', dtype=float, delimiter='\t')

    km1 = Kmeans(dataset1[:, 2:], dataset1[:, 1], 3)
    km2 = Kmeans(dataset2[:, 2:], dataset2[:, 1], 10)

    ic1 = km1.initial_centroids(3, 5, 9)
    #ic1 = km1.initial_random_centroids(5)
    ic2 = km2.initial_random_centroids(5)
    # km1.centroids = km1.init_centroids = np.loadtxt(r'../log/cho_ground_centroids.txt')

    # specify iteration as parameter here
    km1.kmeans_algorithm()
    km2.kmeans_algorithm()

    extr_index_validation1 = ExternalIndex(km1.ground_truth_clusters,
                                           km1.clusters)
    extr_index_validation2 = ExternalIndex(km2.ground_truth_clusters,
                                           km2.clusters)

    print('Rand Index of dataset1 clusters :',
          extr_index_validation1.rand_index())
    print('Jaccard Coefficient of dataset1 clusters :',
          extr_index_validation1.jaccard_coefficient())

    print('Rand Index of dataset2 clusters :',
          extr_index_validation2.rand_index())
    print('Jaccard Coefficient of dataset2 dataset clusters :',
          extr_index_validation2.jaccard_coefficient())

    plot1 = Visualization(dataset1.data[:, 2:], km1.clusters, dataset1.data[:,
                                                                            1])
    plot2 = Visualization(dataset2.data[:, 2:], km2.clusters, dataset2.data[:,
                                                                            1])
    plot1.plot(r'../log/td1.jpg')
    plot2.plot(r'../log/cho2.jpg')

    # gene_cluster_matched = km1.cluster_validation()
    # print('Genes that matched in clusters: ', gene_cluster_matched)

    return
Esempio n. 12
0
def main():
    km = Kmeans(tc.init_board_gauss(nb_points, nb_classe, mini, maxi,
                                    ecart_min, ecart_max),
                nb_cluster=nb_cluster,
                cpu=cpu,
                methode_dist=methode_dist,
                adr=img_dir)
    km.run_global(choose_nb_graph=True, grphq=True)
    km.save(km_path)
    print("\n{}".format(km))
    return None
Esempio n. 13
0
    def _init_kmeans(self):
        """
        Initialize using k-means.
        Uses random intialization for k-means. This is a really bad idea.
        """
        data = self.data
        k = self.k
        # Estimate the means of the mixture components, using k-means
        km = Kmeans(data, k)

        return km.cluster.T, km.label
Esempio n. 14
0
    def quantize(self, X):
        N, D, C = X.shape

        X_reshaped = np.reshape(X, (N * D, C))
        print(X_reshaped)

        model = Kmeans(np.power(2, self.b))
        model.fit(X_reshaped)
        model.predict(X_reshaped)
        y = np.reshape(model.predict(X_reshaped), (N, D))
        self.means = model.means
        self.y = y
        self.X = X
Esempio n. 15
0
def main(_argv):
    probki_string, nazwy_atr, czy_atr_symb = wczytaj_baze_probek_z_tekstem(
        'spirala.txt', 'spirala-type.txt')
    probki = probki_str_na_liczby(probki_string, (0, 1))
    grupy, osrodki = Kmeans(probki, FLAGS.groups, FLAGS.iterations, progress)

    fig = plt.figure(1)
    anim = animation.FuncAnimation(fig,
                                   Animate,
                                   frames=len(progress),
                                   repeat=False,
                                   interval=500)
    chart.show()
Esempio n. 16
0
    def create_splits(self, X):
        model = Kmeans(3)
        N, D = X.shape

        splits = np.empty((D * model.k, ))

        for d in range(D):
            conSplit = X[:, d]
            conSplit = np.array(conSplit).transpose()
            model.fit(conSplit)
            for i in range(model.k):
                splits[d] = model.means[i, ]
        self.thresholds = np.unique(self.means)
Esempio n. 17
0
def main():
    
    
    df = bdd.date_dir(path)
    idx, mtx = bdd.df2np(df)
    del df
    
    km = Kmeans(mtx, nb_cluster=nb_cluster, cpu=cpu, methode_dist=methode_dist, adr=img_dir, index=idx)
    km.run_global(grphq=True, choose_nb_graph=True)
    
    km.save(km_path)
    print("\n{}".format(km))
    return None
    def quantize(self, img):
        """
        Quantizes an image into 2^b clusters

        Parameters
        ----------
        img : a (H,W,3) numpy array

        Returns
        -------
        quantized_img : a (H,W) numpy array containing cluster indices

        Stores
        ------
        colours : a (2^b, 3) numpy array, each row is a colour

        """

        H, W, D = img.shape
        # model = KMeans(n_clusters=2**self.b, n_init=3)
        model = Kmeans(k=2**self.b)
        X = np.reshape(img, (H * W, 3))
        model.fit(X)
        y = model.predict(X)
        print(y.shape)
        # self.y=y
        # self.center=model.means
        # Reshape 2D-matrix to 3D-img
        # quantized_img = img
        # X=np.reshape(img,(H*W,3))
        # model.fit(X)
        # y=model.predict(X)
        # m=y.shape
        # print(m)
        # quantized_img=y

        self.colours = np.zeros((2**self.b, 3), dtype='uint8')
        # ,dtype='uint8')
        for i in range(2**self.b):
            # img[i, :] = quantized_img[i]
            self.colours[i, :] = model.means[i, :]
        img = np.zeros((H * W), dtype='uint8')
        for i in range(H * W):
            img[i] = y[i]
        img = np.reshape(img, (H, W))
        quantized_img = img

        # TODO: fill in code here
        # raise NotImplementedError()

        return quantized_img
Esempio n. 19
0
    def create_splits(self, X):
        #k value obtained via elbow method
        N, D = X.shape
        splits = []
        for i in range(D):
            model = Kmeans(k=10)
            #all values in an example

            vec = X[:, i].reshape(N, 1)
            model.fit(vec)
            threshs = model.means
            splits.append(np.squeeze(threshs))

        self.thresholds = splits
    def quantize_image(self, img):
        # w, h, d = img.shape
        w, h, d = original_shape = tuple(img.shape)

        resized_image = np.reshape(img, (w * h, d))

        model = Kmeans(k=(2**self.b))
        model.fit(resized_image)
        labels = model.predict(resized_image)
        self.means = getattr(model, "means")

        print("Cluster Assignments")
        print(labels)
        return labels
Esempio n. 21
0
def main():
    df = bdd.concat_dir(path)
    df = bdd.drop_profile(df, drop_var)
    df = bdd.bdd2bow(df)
    idx, mtx = bdd.df2np(df)
    col = df.columns.values.astype(str)
    del df
    
    km = Kmeans(mtx, nb_cluster=nb_cluster, cpu=cpu, methode_dist=methode_dist, adr=img_dir, index=idx)
    km.run_global(choose_nb_graph=True)
    bdd.print_means_words(km, col)
    km.save(km_path)
    print("\n{}".format(km))
    return None
Esempio n. 22
0
def main():

    KMEANS = leerModelo()
    if (KMEANS == None):
        print('NO EXISTE UN MODELO')
        spotify = SpotifyPro()
        df = spotify.iniciar(idPlaylist='1QP6tyANnZZ9bRTfQG4X7a')
        k = Kmeans(df)  # contiene red y datasets
        if (len(df)):
            k.importarDatos()
            if (k.red != None):
                guardarModelo(k)
    else:
        print('Ya existe un modelo')
    def dequantize_image(self, img):
        w, h, d = img.shape
        resized_image = np.reshape(img, (w * h, d))
        original_image = np.zeros(img.shape)

        model = Kmeans(k=(2**self.b))
        model.fit(resized_image)

        labels = model.predict(resized_image)
        self.means = getattr(model, "means")

        label_idx = 0
        for i in range(w):
            for j in range(h):
                original_image[i][j] = self.means[labels[label_idx]]
                label_idx += 1
        return original_image
Esempio n. 24
0
        def closure_1_3_1():
            k = 4
            best_model = None
            min_error = np.inf
            for i in range(50):
                model = Kmeans(k)
                model.fit(X)
                error = model.error(X)
                if error < min_error:
                    min_error = error
                    best_model = model

            plt.figure()
            utils.plot_2dclustering(X, best_model.predict(X))

            fname = os.path.join("..", "figs",
                                 "kmeans_outliers_best_model.png")
            plt.savefig(fname)
            print("\nFigure saved as '%s'" % fname)
Esempio n. 25
0
    def test_kmeans(self):
        exact_labels = []
        label_1 = "Iris-setosa"
        label_2 = "Iris-versicolor"
        label_3 = "Iris-virginica"

        for item in self.data["label"]:
            if item == label_1:
                exact_labels.append(0)
            elif item == label_2:
                exact_labels.append(1)
            else:
                exact_labels.append(2)

        k = 3
        kmeans = Kmeans(k)

        X_train, X_test, y_train, y_test = train_test_split(self.features,
                                                            exact_labels,
                                                            test_size=0.33,
                                                            random_state=42)

        kmeans.load_data(X_train.to_numpy().tolist())
        kmeans.train()
        labels = kmeans.predict(X_test.to_numpy().tolist())

        accurate_sum = 0
        for i in range(len(labels)):
            if labels[i] == y_test[i]:
                accurate_sum += 1

        print("Akurasi K-Means: ", accurate_sum / len(labels))

        kmeans_sklearn = KMeans(n_clusters=3)
        kmeans_sklearn.fit(X_train)

        sklearn_accurate_sum = 0
        for i in range(len(labels)):
            if kmeans_sklearn.labels_[i] == y_test[i]:
                sklearn_accurate_sum += 1

        print("Akurasi K-Means sklearn: ", sklearn_accurate_sum / len(labels))
Esempio n. 26
0
    def _init_parameters(self, X, method='kmeans'):
        """
        初始化高斯分布的参数。
         如果 method == 'kmeans',那么使用kmeans进行初始化;
         如果 method == 'random',那么进行随机初始化。
        """
        n = X.shape[0]
        self.Guass = [Guass_distribution(dim=self.dim) for i in range(self.m)]

        if method is 'kmeans':
            try:
                kmeans = Kmeans()
                labels, centroids = kmeans.main(X,
                                                k=self.m,
                                                t=100,
                                                c_strategy='kmeans')
            except:
                centroids, labels = vq.kmeans2(X,
                                               self.m,
                                               minit='points',
                                               iter=1000)
            clusters = [[j for j in range(n) if labels[j] == i]
                        for i in range(self.m)]

        elif method is 'random':
            time_seed = int(time.time())
            np.random.seed(time_seed)
            clusters = [[] for i in range(self.m)]
            centroids = random.sample(list(range(n)), self.m)  # 随机生成m个中心

            for i in range(n):
                ci = np.argmin([la.norm(X[i] - X[c]) for c in centroids])
                clusters[ci].append(i)

        else:
            raise ValueError("Unknown method type!")

        for i in range(self.m):
            guass = self.Guass[i]
            data = X[clusters[i]]
            guass.init(data)
            guass.weight = len(clusters[i]) / n
Esempio n. 27
0
def sliding_window_three_months(df, date):
    starting_date_obj = datetime.datetime.strptime(
        date, "%Y-%m-%d")
    preprocess_obj = preprocess()
    monthly_data = preprocess_obj.get_three_monthly_candlestick_data(
        df, date)
    kmeans = Kmeans(monthly_data)
    e = kmeans.get_clusters()
    print('original', e)
    ctut = []
    ctlt = []
    ctbl = []
    ctc = []
    for i in range(0, len(e)):
        ctut.append(e[i][0])
        ctlt.append(e[i][1])
        ctbl.append(e[i][2])
        ctc.append(e[i][3])
    candlestickst = candlestickState(ctut, ctlt, ctbl, ctc)
    return candlestickst
Esempio n. 28
0
def main():
    #In our dataset, our customers are id'd 0 to 999. Adjust this if dataset changes number of clients
    customerValue = [0] * 1000

    pointsFile = open('../data/points.json')
    breakfastMovementFile = open('../data/breakfast.json')
    breakfastPurchaseFile = open('../data/breakfastbuy.json')
    lunchMovementFile = open('../data/lunch.json')
    lunchPurchaseFile = open('../data/lunchbuy.json')
    dinnerMovementFile = open('../data/dinner.json')
    dinnerPurchaseFile = open('../data/dinnerbuy.json')

    breakfastPurchaseData = json.load(breakfastPurchaseFile)["data"]
    for purchase in breakfastPurchaseData:
        customerValue[int(purchase["Client ID"])] += int(
            float(purchase["Price"]))

    lunchPurchaseData = json.load(lunchPurchaseFile)["data"]
    for purchase in lunchPurchaseData:
        customerValue[int(purchase["Client ID"])] += int(
            float(purchase["Price"]))

    dinnerPurchaseData = json.load(dinnerPurchaseFile)["data"]
    for purchase in dinnerPurchaseData:
        customerValue[int(purchase["Client ID"])] += int(
            float(purchase["Price"]))

    dataset = []

    print("Loading dataset...")
    #Change the line below to use a different dataset
    lunchMovementsData = json.load(lunchMovementFile)["data"]
    for customer in lunchMovementsData:
        #Data is in x,y. We want row,col
        dataset.append([
            int(customer["X"]), -int(customer["Y"]) + 55,
            customerValue[int(customer["ID"])]
        ])
    print("Done!")

    Kmeans(4, dataset)
Esempio n. 29
0
def clusterpixels(infile, k, steps):
    im = np.array(Image.open(infile))
    dx = int(im.shape[0] / steps)
    dy = int(im.shape[1] / steps)
    # 计算每个组件的图像特征
    features = []
    for x in range(steps):
        for y in range(steps):
            R = np.mean(im[x * dx:(x + 1) * dx, y * dy:(y + 1) * dy,
                           0])  #行、列、颜色通道
            G = np.mean(im[x * dx:(x + 1) * dx, y * dy:(y + 1) * dy, 1])
            B = np.mean(im[x * dx:(x + 1) * dx, y * dy:(y + 1) * dy, 2])
            features.append([R, G, B])
    features = np.array(features, 'f')  # 将特征值变换为数组矩阵形式
    # 聚类, k是聚类数目
    centroids, variance, iternum = Kmeans(features, k)
    code, distance = vq(features, centroids)  #进行矢量量化,使用获得的聚类标签画图
    codeim = code.reshape(steps, steps)  #给数组一个新的形状而不改变其数据
    codeim = imresize(codeim, im.shape[:2],
                      'nearest')  #imresize() 方法,用来指定新图像的大小
    return codeim
Esempio n. 30
0
def kmeans(imagens, segmentadas,   path):

    k = Kmeans(imagens)
    qtd  = 0
    
    for i in imagens: 
        # Leitura Imagem
        img = imread(imagens[qtd][0]) 
        #img = cv2.resize(img, (segmentadas[qtd][2], segmentadas[qtd][1]))   

        res2 = k.kmeans_seg(img, 2) / 255
        res3 = k.kmeans_seg(img, 3) / 255
        res9 = k.kmeans_seg(img, 5) / 255
        cv2.imwrite("res2.png", res2)
        cv2.imwrite("res3.png", res3)
        cv2.imwrite("res9.png", res9)

        fig = plt.figure(figsize=(9,3), dpi=200)
        k.add_image(fig, img, 1, 4, 1, 'original')
        k.add_image(fig, res2, 1, 4, 2, 'k=2')
        k.add_image(fig, res3, 1, 4, 3, 'k=3')
        k.add_image(fig, res9, 1, 4, 4, 'k=5')