Example #1
0
	def init_with_kmeans(self):
		print(self.cols*self.rows)
		print(len(list(np.where(self._mask == 0))[1]))
		'''Initialise the BGDGMM and FGDGMM, which are respectively background-model and foreground-model,
			using kmeans algorithm'''
		max_iter = 2 # Max-iteration count for Kmeans
		'''In the following two indexings, the np.logical_or is needed in place of or'''
		self._bgd = np.where(np.logical_or(self._mask == self._GC_BGD, self._mask == self._GC_PR_BGD)) # Find the places where pixels in the mask MAY belong to BGD.
		self._fgd = np.where(np.logical_or(self._mask == self._GC_FGD, self._mask == self._GC_PR_FGD)) # Find the places where pixels in the mask MAY belong to FGD.
		self._BGDpixels = self.img[self._bgd]
		self._FGDpixels = self.img[self._fgd]
		KMB = kmeans(self._BGDpixels, dim = 3, n = self.k, max_iter = max_iter) # The Background Model by kmeans
		KMF = kmeans(self._FGDpixels, dim = 3, n = self.k, max_iter = max_iter) # The Foreground Model by kmeans
		KMB.run()
		KMF.run()
		# self._BGD_types = KMB.output()
		# self._FGD_types = KMF.output()
		# print(self._BGD_types)
		self._BGD_by_components = KMB.output()
		self._FGD_by_components = KMF.output()
		self.BGD_GMM = GMM() # The GMM Model for BGD
		self.FGD_GMM = GMM() # The GMM Model for FGD
		'''Add the pixels by components to GMM'''
		for ci in range(self.k):
			# print(len(self._BGD_by_components[ci]))
			# print(self._BGD_by_components[ci])
			for pixel in self._BGD_by_components[ci]:
				# pixel = np.asarray([j for j in pixel], dtype = np.float32)
				self.BGD_GMM.add_pixel(pixel, ci)
			for pixel in self._FGD_by_components[ci]:
				self.FGD_GMM.add_pixel(pixel, ci)
		# for ci in range(self.k):
		# 	bgd_index = np.where(self._BGD_types == ci)
		# 	fgd_index = np.where(self._FGD_types == ci)
		# 	for pixel in self.img[bgd_index]:
		# 		self.BGD_GMM.add_pixel(pixel, ci)
		# 	for pixel in self.img[fgd_index]:
		# 		self.FGD_GMM.add_pixel(pixel, ci)
		self.BGD_GMM.learning()
		self.FGD_GMM.learning()
            n = 100 * (index - 2)

        n = index

        print "K-means for " + str(n) + " centroids."

        center = None
        label = None
        ret = 99999999999

        for _ in range(1):
            centroids = features[np.random.choice(range(len(features)),
                                                  n,
                                                  replace=False)]

            c_ret, c_label, c_center = kmeans(features, k = n, \
                                              centroids = centroids, steps = 100)

            if c_ret < ret:
                ret = c_ret
                label = c_label
                center = c_center

            print ret

        rets.append(ret)

    save_clustering("features/hog_cluster.bin", c_label, c_center)
    # plt.plot(rets)
    # plt.show()
Example #3
0
        pid_list, command_list, param_list = [], [], []
        index_0, index_1 = [], []
        print('node: ', nodes[i][0])
        for param in params[i]:
            if param[2] is not None:
                pid_list.append(param[0])
                command_list.append(param[1])
                param_list.append(param[2])
        dataset = np.vstack((param_list, param_list))
        dataset = np.transpose(dataset)

        # kmeans clustering
        k = 2
        dataset = np.mat(dataset)
        if len(dataset) > 0:
            centroids, cluster_assment = km.kmeans(dataset, k)
            for index, value in enumerate(cluster_assment[:, 0]):
                if value == 0:
                    index_0.append(index)
                if value == 1:
                    index_1.append(index)
            print('kmeans: ', param_i)
            if len(index_0) <= len(index_1):
                print('command: ', [command_list[j] for j in index_0])
                #print('pid: ', [pid_list[j] for j in index_0])
            else:
                print('command: ', [command_list[j] for j in index_1])
                #print('pid: ', [pid_list[j] for j in index_0])
            print()

            # 2d plot
Example #4
0
def k_means_RBFS(training_samples, N):

    rbfs = kmeans(training_samples, N)
    sigma = 0.1*( rbfs.max() - rbfs.min() )/ np.sqrt(2*N)

    return rbfs, sigma
Example #5
0
# coding=utf-8
import sys
import os

root_path = os.getcwd()  # 获得当前py的根目录
sys.path.append(root_path + '/Other')  # 导入Other文件夹
from OnePassCluster import *
import k_means
from sklearn import preprocessing

if __name__ == '__main__':
    vectors = np.loadtxt('Other/dataSet/dim1024.txt')  # k=16 threshold=0.1927
    # vectors = np.loadtxt('Other/dataSet/g2-256-100.txt')  # 0.192717136469125
    # 归一化(Normalization)
    vectors = preprocessing.normalize(vectors)  # 归一化
    # print vectors
    k = 16
    t1 = time.time()
    cluster_result = np.array(k_means.kmeans(k=k, vectors=vectors))
    t2 = time.time()
    print "k-means spend time %.9fs" % ((t2 - t1) / 1000)
    print cluster_result
    for i in range(k):
        print i, np.where(cluster_result == i)[0]
        print '-------'

    o_p_c = OnePassCluster(t=0.1927, vector_list=vectors)
    o_p_c.print_result()
Example #6
0
    def _parse_address(self):
        '''
        Convert address to binary arrays.
        '''

        data = []
        print "Parsing address."
        print len(np.unique(self.data[:, 6]))

        for index in range(len(self.data[:, 6])):
            splitted_address = self.data[:, 6][index].split(' ', 1)

            if self._is_int(splitted_address[0]):
                data.append(splitted_address[1])
            else:
                data.append(self.data[:, 6][index])

            self.data[index][6] = data[index]

        labels = np.unique(data)
        data = np.array(data)
        means_address = []

        for index_label in range(len(labels)):
            address_data = self.data[np.where(data == labels[index_label])]
            means_address.append([np.mean(address_data[:, 7]), np.mean(address_data[:, 8])])

        means_address = np.float32(np.array(means_address))

        cond_1 = means_address[np.where(means_address[:, 1] > 85)]
        centroid1 = cond_1[0]
        cond_2 = means_address[np.where(np.logical_and(means_address[:, 1] > 68, means_address[:, 1] < 80))]
        centroid2 = cond_2[0]
        cond_3 = means_address[np.where(np.logical_and(means_address[:, 1] > 50, means_address[:, 1] < 70))]
        centroid3 = cond_3[0]
        total = len(means_address)

#         centroid1 = means_address[np.random.randint(total)]
#         centroid2 = means_address[np.random.randint(total)]
#         centroid3 = means_address[np.random.randint(total)]

        rets = []

        # for n in range(10, 11):
        for n in range(100, 101):
            center = None
            label = None
            ret = 9999999

            for _ in range(5):
                centroids = []

                if n == 2:
                    centroids.append(centroid2)
                    centroids.append(means_address[np.random.randint(total)])
                    centroids = np.float32(centroids)
                elif n == 3:
                    centroids.append(centroid1)
                    centroids.append(centroid2)
                    centroids.append(means_address[np.random.randint(total)])
                    centroids = np.float32(centroids)
                elif n == 4:
                    centroids.append(centroid1)
                    centroids.append(centroid2)
                    centroids.append(centroid3)
                    centroids.append(means_address[np.random.randint(total)])
                else:
                    centroids.append(centroid1)
                    centroids.append(centroid2)
                    centroids.append(centroid3)
                    centroids = np.float32(centroids)
                    centroids = np.vstack((centroids, means_address[np.random.choice(range(len(means_address)), n - 3)]))

                    while len(np.unique(centroids)) < n:
                        centroids = np.unique(centroids)
                        x = np.random.rand() * np.mean(centroids[:, 0])
                        y = np.random.rand() * np.mean(centroids[:, 1])
                        centroids = np.vstack((centroids, [x, y]))

                c_ret, c_label, c_center = kmeans(means_address, k = n, \
                                                  centroids = centroids, steps = 1000)

                if c_ret < ret:
                    ret = c_ret
                    label = c_label
                    center = c_center

                print ret

            rets.append(ret)

#         plt.plot(rets)
#         plt.show()

        # Now separate the data, Note the flatten()
        # Plot the data
#         plt.scatter(means_address[:, 0], means_address[:, 1])
#
#         plt.scatter(center[:, 0], center[:, 1], s = 80, c = 'y', marker = 's')
#         plt.xlabel('Height'), plt.ylabel('Weight')
#         plt.show()


        for index_label in range(len(labels)):
            data[np.where(data == labels[index_label])] = "A" + str(label[index_label])

        labels = np.unique(data)
        data = self._binarize_feature(data, labels)

        del means_address
        del splitted_address
        del address_data
        del rets
        del center
        del label
        del c_label
        del c_center
        del centroids

        return labels, data
from numpy import genfromtxt
from sklearn import svm, metrics
from sklearn.neighbors import KNeighborsClassifier
import scipy.io
from k_means import kmeans

data = scipy.io.loadmat('dataku.mat')["dataimage_plus"]

#print(data)

#data = genfromtxt('dataimage.csv', delimiter=',')
n_samples = len(data)
#split data input and data output
data_i = data[:,0:2] #2:4 => fft (mean dan standard deviasi FFT), 0:2 => (entropy dan energi GLCM)

data_o,c = kmeans(data_i,3)
print len(data_o)
data_expected = data[:,4] - 1
print len(data_expected)

#split 50 % data (data training)
# data_i_train = data_i[0:][::2]
# data_o_train = data_o[0:][::2]

# #split 50 % data (data testing)
# data_i_test = data_i[1:][::2]
# data_o_test = data_o[1:][::2]

# #create knn classifier
# neigh = KNeighborsClassifier(n_neighbors=3)
Example #8
0
raw_data = mat73.loadmat('./data/kmeans_pts.mat')

gr_list = [t[0] for t in raw_data['Data']['gr_pts']]

n_itrs = 20

#finish this and run by tomorrow morning.
cluster_purities = pd.DataFrame(
    columns=['Algorithm', 'NumberClusters', 'ClusterPurity'])
for k in range(5, 30, 5):
    print('.')
    print('.')
    print('.')
    for exp in range(10):
        # for exp in range(2):
        centers = km.kmeans(gr_list, k, n_itrs, 'flag')
        cluster_purity = km.clusterPurity(labels_true, gr_list, centers,
                                          'flag')
        cluster_purities = cluster_purities.append(
            {
                'Algorithm': 'Flag Mean',
                'NumberClusters': k,
                'ClusterPurity': cluster_purity
            },
            ignore_index=True)
        print("Flag trial" + str(exp + 1) + " finished")
        print('.')

        centers = km.kmeans(gr_list, k, n_itrs, 'sine')
        cluster_purity = km.clusterPurity(labels_true, gr_list, centers,
                                          'sine')
Example #9
0
#pl.hist(useful_values,50, normed=1, facecolor='green', alpha=0.75)
#pl.show()

time_mat_PE1 = deepcopy(time_mat)
time_mat_PE2 = deepcopy(time_mat)
time_mat_PE3 = deepcopy(time_mat)
dim_max = np.shape(time_mat)
for i1 in range(0, dim_max[0]):
    for j1 in range(0, dim_max[1]):
        temp = double(time_mat[i1, j1]/500)
        if temp > 1:
            time_mat_PE2[i1, j1] = 500
        if temp <0:
            time_mat_PE2[i1, j1] = 0

centroids, clusterAssment = kmeans(time_mat, 5) 
print (clusterAssment)
print (np.shape(clusterAssment))
#showCluster(time_mat, 5, centroids, clusterAssment)  

########################################  POINT  #####
first_line = ['pointId', 'lon', 'lat', 'alt', 'valueOfTime1', 'valueOfTime2', 'valueOfTime3', 'valueOfTime4', 'valueOfTime5', \
'valueOfTime6', 'valueOfTime7', 'valueOfTime8', 'valueOfTime9', 'valueOfTime10', 'valueOfTime11', 'valueOfTime12', 'valueOfTime13']


outfp_o_cl1.writerow(first_line)
outfp_o_cl2.writerow(first_line)
outfp_o_cl3.writerow(first_line)
outfp_o_cl4.writerow(first_line)
outfp_o_cl5.writerow(first_line)
Example #10
0
    print("=================================================================")
    '''
    K-Means Algorithm
    '''

    print("\n\nK-means Clustering")
    k = input("Enter K: ")
    max_iterations = input("Enter Maximum iterations: ")
    min_df = input("Enter Minimum document Frequency: ")
    print("=================================================================")
    k = int(k)
    max_iterations = int(max_iterations)
    min_df = int(min_df)

    start = time.time()
    Kmeans = k_means.kmeans()

    # Min Document Frequency is used as Feature Selcection Parameter
    y_pred, labels = Kmeans.clustering(X, k, max_iterations, min_df)

    # Vectors and Features of X in K-means Clustering
    '''
    k_means_vector = Kmeans.vectors
    k_means_features = Kmeans.features
    '''

    contingency_matrix = metrics.cluster.contingency_matrix(y, y_pred)
    score = purity_score(y, y_pred)
    print("Purity: " + str(score))
    end = time.time()
    tot = end - start
Example #11
0
from numpy import genfromtxt
from sklearn import svm, metrics
from sklearn.neighbors import KNeighborsClassifier
import scipy.io
from k_means import kmeans

data = scipy.io.loadmat('dataku.mat')["dataimage_plus"]

#print(data)

#data = genfromtxt('dataimage.csv', delimiter=',')
n_samples = len(data)
#split data input and data output
data_i = data[:,0:2] #2:4 => fft (mean dan standard deviasi FFT), 0:2 => (entropy dan energi GLCM)

data_o,c = kmeans(data_i,3)
#print data_o
data_exp = data[:,4]

#split 50 % data (data training)
data_i_train = data_i[0:][::2]
data_o_train = data_o[0:][::2]

#split 50 % data (data testing)
data_i_test = data_i[1:][::2]
data_o_test = data_exp[1:][::2] - 1

#create knn classifier
neigh = KNeighborsClassifier(n_neighbors=3)

#We learn the digits on the first half of the digits
Example #12
0
# coding=utf-8
'''
Author: ripples
Email: [email protected]

date: 2020/3/11 15:34
desc:
'''
# 可以做一个关于数据集是否混乱的对比

import sys
import copy
sys.path.append('../')
import numpy as np
import matplotlib.pyplot as plt
from k_means import kmeans

path = '../iris/iris.data'
x = kmeans(path, 3)
x.cal()
# x.plot_label_true()
Example #13
0
import scipy.io as sio
import numpy as np
import mat73
import center_algorithms as ca
import matplotlib.pyplot as plt
import k_means as km
import seaborn as sns
import pandas as pd

labels_raw = sio.loadmat(
    './data/kmeans_action_labels.mat')['kmeans_action_labels']

labels_true = [l[0][0] for l in labels_raw['labels'][0][0]]
# labelidxs =labels_raw['labelidxs'][0][0][0]

raw_data = mat73.loadmat('./data//kmeans_pts.mat')

gr_list = [t[0] for t in raw_data['Data']['gr_pts']]

n_itrs = 20

k = 15

centers = km.kmeans(gr_list, k, n_itrs, 'flag')
cluster_purity = km.clusterPurity(labels_true, gr_list, centers, 'flag')
Example #14
0
import numpy as np
import matplotlib.pyplot as plt
from k_means import kmeans

# исходные данные
X = np.array([
  [4, 4],
  [3, 3],
  [5, 3],
  [2, 3],
  [5, 5],
  [3, 2],
  [2, 4],
  [4, 5],
  [5, 4],
  [2, 2]])

# запуск кластеризации
ans = kmeans(2, X)

# отображение результатов
print(ans)
plt.plot(X[:,0], X[:,1], 'bx', ans[:,0], ans[:,1], 'r*', markersize=20)
plt.grid()
plt.show()
Example #15
0
        output = lda.final_output
        with open("cluster_images/top10topicswith10wordswithoutprobs.csv",
                  "a") as top:
            top.write("num_topics=" + str(num_topics) + "_no_above=" +
                      str(no_above).replace(".", "") + ",")
            topics = lda.get_topics(num_words=10, probs=False)
            for topic_words in topics:
                top.write(str(topic_words) + ",")
            top.write("\n")
        with open("centroids.csv", "a") as top:
            top.write(",")
            for i in range(1, 11):
                top.write("cluster" + str(i) + ",")
            top.write("\n")
        for k in [8]:
            k_means = kmeans(output, "histograms")
            labels, score = k_means.cluster(k)
            with open("centroids.csv", "a") as top:
                top.write("num_topics=" + str(num_topics) + "_no_above=" +
                          str(no_above).replace(".", "") + "_k=" + str(k) +
                          ",")
                centroids = k_means.centroids
                for center in centroids:
                    for counter, topics in enumerate(center):
                        top.write(str(counter + 1) + "= " + str(topics) + " ")
                    top.write(",")

                #top.write("\n")

            k_means.plot_histogram2(
                "num_topics=" + str(num_topics) + "_no_above=" +
Example #16
0
"""
Created on Tue Oct 10 11:40:16 2017

@author: xuwh
"""

from numpy import *
import time
import matplotlib.pyplot as plt
import types
import k_means
## step 1: load data
print("step 1: load data...")
dataSet = []
fileIn = open(r'D:\Python\MachineLearningInAction\testSet.txt')
for line in fileIn.readlines():
    lineArr = line.strip().split()
    #print lineArr[0],lineArr[1]
    #dataSet.append([float(lineArr[0]),float(lineArr[1])])
    dataSet.append([float(lineArr[0])])

print("step 2: clustering...")
#这里使用mat将dataSet数据转换为矩阵之后才能进行线性代数操作
dataSet = mat(dataSet)
k = 4
centroids, clusterAssment = k_means.kmeans(dataSet, k)

# step 3: show the result
print("step 3: show the result...")
#plt.plot()
k_means.showCluster(dataSet, k, centroids, clusterAssment)
Example #17
0
"""
Created on Sat Oct 25 20:34:32 2014

@author: Imane
"""
import numpy as np
import matplotlib.pyplot as plt
#from os import listdir
#from os.path import isfile, join
#from zscoring import zscoringNII
#from masking import maskdata
from sklearn.decomposition import PCA
from k_means import kmeans

#Applying PCA and plotting
fn = "dataZM\dataMask2.npy"
d=np.load(fn)
pca = PCA(n_components=2)
pca.fit(d)
dpca=pca.transform(d)

plt.scatter(dpca[:,0], dpca[:,1], marker='o', color='b')


#Applying kmeans and plotting
idx, ctrs = kmeans(dpca, 2)

plt.scatter(dpca[(idx==0),0], dpca[(idx==0),1], marker='o', color='r')
plt.scatter(dpca[(idx==1),0], dpca[(idx==1),1], marker='o', color='b')
plt.scatter(ctrs[:,0], ctrs[:,1], marker='o', color='k', linewidths=5)