def run_and_draw(latituede, longtitude, k):
    datList = []
    #解析文本数据中的每一行中的数据特征值
    for P in range(len(latituede)):
        datList.append([latituede[P], longtitude[P]])
        datMat = mat(datList)

    myCentroids, clusterAssing = K_means.biKmeans(datMat, k)

    LA = mean(latituede)
    LO = mean(longtitude)

    center = []
    other = []
    # 添加聚类中点
    for h, l in zip(myCentroids[:, 0].flatten().A[0],
                    myCentroids[:, 1].flatten().A[0]):  # 解包操作
        center.append([f(h), f(l)])

    for m in range(k):

        # 添加城市点
        ptsInCurrCluster = datMat[nonzero(clusterAssing[:, 0].A == m)[0], :]
        for i, j in zip(ptsInCurrCluster[:, 0].flatten().A[0],
                        ptsInCurrCluster[:, 1].flatten().A[0]):  # 解包操作
            other.append([f(i), f(j), m])
    print('[INFO]: 景点聚类完成')
    return center, other, LO, LA
Example #2
0
def get_sse(data, centroid, q):
    sse = []
    for i in range(len(centroid)):
        sum = 0
        for j in range(len(q)):
            if q[j] == i:
                sum += K_means.get_dist(data[j][0], data[j][1], centroid[i][0], centroid[i][1])
        sse.append(sum)
    return sse
def draw_Image(points, m, original_data, vertical_digit, horizontal_digit):
    # 下面开始画图
    # plt.figure(figsize=(12, 9))
    # plt.imshow(original_data);  #和plt.show()配合才能画图
    # plt.axis('off')
    # plt.show()

    #建立输出文件夹
    if not np.os.path.exists("../Home1.2_Image"):
        np.os.mkdir("../Home1.2_Image")
    for i in [2, 3, 5, 10, 20, 50]:
        # 建立输出文件夹
        filePackege = "../Home1.2_Image/K_equals" + str(i) + "/"
        if not np.os.path.exists(filePackege):
            np.os.mkdir(filePackege)
        print('Number of clusters:', i)
        starttime = time.time()
        #用K——means方法
        c_new, labels = K_means(points, i, m)
        # 用K——metroid方法
        #c_new, labels = K_metroid(points, i, m)
        endtime = time.time()
        runningTime = endtime - starttime
        runningtimeAnounce = "for K= " + str(
            i) + " the running time is " + str(runningTime) + "s"

        new_image = np.zeros((vertical_digit, horizontal_digit, 3))
        labels = np.array(labels)
        j = 0
        #生成image三维矩阵
        while j < m:
            row = j // horizontal_digit
            coloum = j % horizontal_digit
            new_image[row, coloum, :] = c_new[labels[j], :]
            j = j + 1

        j = 0
        #装换centroid,并画图
        new_centroid = np.zeros((1, i, 3))
        while j < i:
            new_centroid[0, j, :] = c_new[j, :]
            j = j + 1

        plt.figure(figsize=(12, 1))
        plt.imshow(new_centroid.astype('uint8'))
        plt.axis('off')
        fileaddressKdemo = "../Home1.2_Image/" + str(i) + "K.jpg"
        plt.savefig(fileaddressKdemo)
        # plt.show()
        plt.figure(figsize=(12, 9))
        plt.imshow(new_image.astype('uint8'))
        plt.axis('off')
        plt.text(12, 4, runningtimeAnounce)
        fileaddressdemo = filePackege + str(i) + "demo.jpg"
        plt.savefig(fileaddressdemo)
Example #4
0
    def run(self):
        db = self.db
        N = db['N']

        if type(db['H_matrix']) == type(None):
            db['H_matrix'] = np.eye(N) - np.ones((N, N)) / N

        if db['kernel_type'] == 'Linear Kernel':
            optimize_linear_kernel(db)
        elif db['kernel_type'] == 'Gaussian Kernel':
            output = np.empty((N, 1), dtype=np.float32)

            if self.db['prev_clust'] == 0:
                self.kdac.SetupParams(self.params)
                self.kdac.Fit(db['data'], N, db['d'])
            elif (self.db['with_predefined_clustering'] == True):
                self.kdac.SetupParams(self.params)
                self.kdac.Fit(db['data'], N, db['d'], db['Y_matrix'], N,
                              db['C_num'])
            else:
                #import pdb; pdb.set_trace()
                self.kdac.SetupParams(self.params)
                self.kdac.Fit()

            print 'got to here'
            self.kdac.Predict(output, N, 1)

            db['allocation'] = output.T[0]
            db['allocation'].astype(np.int32)
            db['allocation'] += 1  # starts from 1 instead of 0

            db['binary_allocation'] = np.zeros((N, db['C_num']))

            #	Convert from allocation to binary_allocation
            for m in range(db['allocation'].shape[0]):
                db['binary_allocation'][m, int(db['allocation'][m]) - 1] = 1

            if db['Y_matrix'].shape[0] == 0:
                db['Y_matrix'] = db['binary_allocation']
            else:
                db['Y_matrix'] = np.append(db['Y_matrix'],
                                           db['binary_allocation'],
                                           axis=1)

            self.db['prev_clust'] += 1
            return

        elif self.db['kernel_type'] == 'Polynomial Kernel':
            optimize_polynomial_kernel(self.db)
        else:
            raise ValueError('Error : unknown kernel was used.')

        normalize_each_U_row(self.db)
        K_means(self.db)
        self.db['prev_clust'] += 1
Example #5
0
def bi_kmeans(data, k):
    q = K_means.kmeans(data, 2)
    index = 2
    # 初始化centroids节点
    centroid = K_means.init_centroids(data, 2)
    index=2
    while index!=k:
        datasep = []
        a = max_key(get_sse(data, centroid, q))
        print a
        for i in range(len(data)):
            if q[i] == a:
                datasep.append(data[i])
        sep_centroid = K_means.init_centroids(datasep, 2)
        index += 1
        print centroid
        centroid=copy.deepcopy(numpy.delete(centroid, a,0))
        print centroid
        centroid = numpy.concatenate((centroid, sep_centroid))
        print "======================================"
        print centroid
        print "======================================"
        q = K_means.cost_funct(data, centroid)
        centroid = K_means.update_centroid(data, centroid, q)
    pre_centroid = centroid
    q = K_means.cost_funct(data_process.data, centroid)
    now_centroid = copy.deepcopy(pre_centroid)
    pre_centroid = K_means.update_centroid(data, pre_centroid, q)
    print now_centroid - pre_centroid
    while (now_centroid != pre_centroid).any():
        print pre_centroid
        print now_centroid
        q = K_means.cost_funct(data, pre_centroid)
        print q
        now_centroid = copy.deepcopy(pre_centroid)
        pre_centroid = K_means.update_centroid(data, pre_centroid, q)
    return q
Example #6
0
    def run(self):
        db = self.db
        N = self.db['N']

        if db['data_type'] == 'Feature Matrix':
            self.db['data'] = self.center_data(self.db['data'])

        if self.db['kernel_type'] == 'Linear Kernel':
            optimize_linear_kernel(self.db)
        elif self.db['kernel_type'] == 'Gaussian Kernel':
            optimize_gaussian_kernel(self.db)
        elif self.db['kernel_type'] == 'Polynomial Kernel':
            optimize_polynomial_kernel(self.db)
        else:
            raise ValueError('Error : unknown kernel was used.')

        normalize_each_U_row(self.db)
        K_means(self.db)
        self.db['prev_clust'] += 1
Example #7
0
    j = test_files[i]
    imgs.append(ref_imgs[j])
    input_infos.append(img_infos[j])
    ref_imgs = np.delete(ref_imgs, (j), axis=0)
    img_infos = np.delete(img_infos, (j), axis=0)
    distance_matrix = np.delete(distance_matrix, (j), axis=0)
    distance_matrix = np.delete(distance_matrix, (j), axis=1)

# loading clusters and centroids
clusters2, centroids2 = np.load('clusters2.npy').item(), np.genfromtxt(
    'centroids2.csv', delimiter=',')
centroids2 = centroids2.astype(int)
clusters3, centroids3 = np.load('clusters3.npy').item(), np.genfromtxt(
    'centroids3.csv', delimiter=',')
centroids3 = centroids3.astype(int)
clusters11, centroids11 = K.KMeans_clustering(ref_imgs, 1, img_infos)
for max_iter in range(
        1, 200, 1
):  # We test the accuracy of each algorithm having different values for maximum iterations

    print(max_iter)

    #Testing the ML-ANN algorithm
    NC = 1  # number of clusters
    weights = np.zeros(NC)  # weights for each cluster
    CDistances = np.zeros(NC)  # dictances between clusters and input image
    distances = {}
    for i in range(0, NC):
        distances[str(i)] = []
    clusters, centroids = copy.deepcopy(clusters11), copy.deepcopy(
        centroids11)  # clustering reference images by K-Means algorithm
Example #8
0
import K_means
import matplotlib.pyplot as plt
from numpy import *


def loadDatas():
    """将文本文件dataSet写入矩阵"""
    fr = open('dataSet')
    arrys = fr.readlines()
    number_lines = len(arrys)
    return_mat = zeros((number_lines, 3))  # 创建矩阵时一定是两个括号
    label_mat = zeros((1, number_lines))
    index = 0
    for line in arrys:
        line = line.strip()  # 去掉空格
        list_from_line = line.split(' ')
        return_mat[index, :] = list_from_line[0:3]
        label_mat[0, index] = list_from_line[0]
        index += 1
    return return_mat, label_mat


mat, labels = loadDatas()
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(mat[:, 0], mat[:, 1])
plt.show()
K_means.k_means()
Example #9
0
    def __init__(self, number_of_neurons_hidden_layer, number_of_neurons_output,
                 is_bias, input_data, expected_outputs, is_aproximation=True):
        # czy uruchomilismy bias, bias aktualnie nie jest zaimplementowany dla warstwy radialnej
        self.is_aproximation = is_aproximation
        self.is_bias = is_bias

        # dane wejsciowe
        self.input_data = input_data
        self.expected_outputs = expected_outputs

        if not self.is_aproximation:
            self.amount_of_class = []
            self.num_class = int(max(self.expected_outputs))
            self.correct_list = []
            self.correct_class_vector = []
            for i in range(self.num_class):
                self.correct_class_vector.append([])
                self.amount_of_class.append([0])
            for i in self.expected_outputs:
                self.amount_of_class[int(i - 1)][0] += 1

        # Pozycja centrów ma być losowana z wektórów wejściowych
        # Laczymy dane wejsciowe i expected outputs żeby móc je razem przelosować i zachować łączność danych
        input_data_random_order = numpy.vstack((self.input_data.T, self.expected_outputs.T)).T
        numpy.random.shuffle(input_data_random_order)

        # wtworzymy wagi dla warstwy wejsciowej, najpierw tworzymy macierz o jakim chcemy rozmiarze

        self.hidden_layer = numpy.zeros((len(input_data_random_order[0, :-1]), number_of_neurons_hidden_layer)).T

        # Ustawiamy sigmy początkowo na 1
        self.scale_coefficient = numpy.ones(numpy.size(self.hidden_layer, 0))
        self.delta_scale_coefficient = numpy.zeros_like(self.scale_coefficient)

        self.hidden_layer = K_means.kmeans(input_data, number_of_neurons_hidden_layer, 1000)

        # TODO: dirty fix
        self.num_of_neurons_hid_layer = len(self.hidden_layer)

        # Szukamy sigm ze wzoru
        self.find_sigma()
        # print(self.scale_coefficient)

        # delty dla momentum, aktualnie nie uczymy wsteczną propagacją warstwy ukrytej więc nie używamy
        self.delta_weights_hidden_layer = numpy.zeros((len(input_data_random_order[0][:-1]),
                                                       self.num_of_neurons_hid_layer)).T

        # tworzymy warstwę wyjściową z losowymi wagami od -1 do 1, jak w zad 1
        self.output_layer = 2 * numpy.random.random((self.num_of_neurons_hid_layer, number_of_neurons_output)).T - 1
        # print(self.output_layer)
        self.delta_weights_output_layer = numpy.zeros((self.num_of_neurons_hid_layer, number_of_neurons_output)).T

        # jesli wybralismy że bias ma byc to tworzymy dla każdej warstwy wektor wag biasu
        if is_bias:
            self.bias_hidden_layer = (2 * numpy.random.random(self.num_of_neurons_hid_layer) - 1)
            self.bias_output_layer = (2 * numpy.random.random(number_of_neurons_output) - 1)
        # jesli nie ma byc biasu to tworzymy takie same warstwy ale zer. Nie ingerują one potem w obliczenia w żaden sposób
        else:
            self.bias_hidden_layer = numpy.zeros(self.num_of_neurons_hid_layer)
            self.bias_output_layer = numpy.zeros(number_of_neurons_output)
        # taka sama warstwa delty jak dla layerów
        self.bias_output_layer_delta = numpy.zeros(number_of_neurons_output)
        self.bias_hidden_layer_delta = numpy.zeros(self.num_of_neurons_hid_layer)
Example #10
0
        if clust is None or len(clust) <= 1:
            count1 += 1
        else:
            count2 += 1
    return (count1 / len(clusters)) * 100


#===========Main========================
_data = Mini_Parse("stars.txt")

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

# We create an object of the Kmeans class and we put the data in it
kmeans = km.Kmeans(_data)
#The input defines the number of centroids and it calculates them
means = kmeans.CalculateMeans(15)
#Based on the means or centroids it creates the clusters
clusters = kmeans.FindClusters()

hullx = []
hully = []
hullz = []

# Call for each cluster the Jarvis March Algorithm
for i in range(len(clusters)):
    if (len(clusters[i])) <= 1:
        continue
    else:
        temp = d3ch.JarvisMarch(clusters[i])
Example #11
0
    def myCluster(self, str_clu, str_iter, str_thres):
        try:
            self.right_tableview.itemChanged.disconnect(
                self.item_Changed_Event)  # 断开连接,不然会很费时间
        except Exception:
            pass

        try:
            if str_clu != '':
                num_clu = int(str_clu)
            else:
                num_clu = 2
            if str_iter != '':
                num_iter = int(str_iter)
            else:
                num_iter = 500
            if str_thres != '':
                num_thres = float(str_thres)
            else:
                num_thres = 0.0001
        except Exception:
            QtWidgets.QMessageBox.critical(self, "错误", "输入错误")
            return

        kmeans = K_means.Kmeans(num_clu, num_iter, num_thres)

        self.SelectedToDf()
        if self.selectedDf is None:
            return
        if not self.selectedDf.empty:
            if self.selectedDf.T.shape[1] < 2:
                QtWidgets.QMessageBox.critical(self, "错误", "输入向量小于2维,无法聚类")
                return
            try:
                print(self.selectedDf)
                col = self.selectedDf.T.shape[1]
                data_X = self.selectedDf.T.iloc[:, :col].values
                predict_y = kmeans.predict(data_X)
                self.right_tableview.insertColumn(
                    self.right_tableview.columnCount())
                for i in range(len(predict_y)):
                    Input_item = QtWidgets.QTableWidgetItem(str(predict_y[i]))
                    Input_item.setTextAlignment(Qt.AlignHCenter
                                                | Qt.AlignVCenter)
                    self.right_tableview.setItem(
                        i,
                        self.right_tableview.columnCount() - 1, Input_item)
                temp_x = pd.DataFrame(data=data_X)
                temp_y = pd.Series(data=predict_y)
                temp_x["Predict"] = temp_y

                if self.selectedDf.T.shape[1] == 2:
                    ax = self.ClusterWindow.plot_widget.add_subplot(111)
                    for i in range(num_clu):
                        ax.scatter(x=temp_x[temp_x.Predict == i].iloc[:, 0],
                                   y=temp_x[temp_x.Predict == i].iloc[:, 1])

                elif self.selectedDf.T.shape[1] >= 3:
                    ax = Axes3D(self.ClusterWindow.plot_widget)
                    for i in range(num_clu):
                        ax.scatter(temp_x[temp_x.Predict == i].iloc[:, 0],
                                   temp_x[temp_x.Predict == i].iloc[:, 1],
                                   temp_x[temp_x.Predict == i].iloc[:, 2])
            except Exception:
                QtWidgets.QMessageBox.critical(self, "错误", "聚类失败,请检查输入数据的合法性")
                return
            try:
                self.item_Changed_Event()
                self.right_tableview.itemChanged.connect(
                    self.item_Changed_Event)  # 断开连接,不然会很费时间
            except Exception:
                pass
            self.ClusterWindow.plot_canvas.draw()
def main():
    x,miu,c = K_means.kmeans(file_name)
    for i in range(len(x[:,0])):
        x[i] = miu[int(c[i])]
    im = Image.fromarray(x.reshape(128,128,3))
    im.save('bird_new.png')
import numpy as np
import copy
import matplotlib.pyplot as plt
import K_means

#data = np.loadtxt('circle_data.csv',delimiter = ',')
data = np.loadtxt('2d_span_data_centered.csv', delimiter=',')
randompoints = K_means.randomPoint(2, 2)
np.random.seed(123)
print(data.shape)
data = data
x = data[0]
y = data[1]
K_means.plot_image(x, y)

#label, center_points = K_means.k_means(data,randompoints)
#K_means.plotting(label,data,center_points)
#print(label)
Example #14
0
    Ant,X,count = nt.main(fname)
    code = cl_ant.ant_label(Ant)
    p, e, c = eva.Evaluate_All(code, X.tolist(), d, fname)
    P = P + p
    E = E + e
    C = C + c

#kmeans.show_plot(code, X)
print "NO-THRESHOLDS Algorithm : %s" %N
print "P: ", P/float(N)
print "E: ", E/float(N)
print "C: ", C/float(N)
print "count:", count

#--- K-means ---
P = 0.0
E = 0.0
C = 0.0
for i in range(N):
    #time.sleep(1)
    code,X = kmeans.main(fname,k)
    p, e, c = eva.Evaluate_All(code, X.tolist(), d, fname)
    P = P + p
    E = E + e
    C = C + c

print "K-means Algorithm : %s" %N
print "P: ", P/float(N)
print "E: ", E/float(N)
print "C: ", C/float(N)
Example #15
0
#---------------------------------------------------------------
# make a codebook

x_train = pickle.load(open('./datasets/train_img.npy', 'rb'))
x_test = pickle.load(open('./datasets/test_img.npy', 'rb'))
y_train = pickle.load(open('./datasets/train_label.txt', 'rb'))
y_test = pickle.load(open('./datasets/test_label.txt', 'rb'))

strong_des = sift.dense_sift_each()  # dense SIFT

# weak_des = sift.weak_des_whole()      # original SIFT

codebook_path = './codebook/km_center_dense_200_caltech'

K_means.clustering(strong_des, codebook_path, n_cluster=200)

#---------------------------------------------------------------
# train, test에 해당하는 level 0, 1, 2의 PHOW(pyramid histogram of word)를 저장

codebooks = codebook.load_codebook(codebook_path)

tr_sl_0 = single_level(cal_train, 0, codebooks)
tr_sl_1 = single_level(cal_train, 1, codebooks)
tr_sl_2 = single_level(cal_train, 2, codebooks)

ts_sl_0 = single_level(cal_test, 0, codebooks)
ts_sl_1 = single_level(cal_test, 1, codebooks)
ts_sl_2 = single_level(cal_test, 2, codebooks)

tr_pyramid_L0 = tr_sl_0  # book 추가
Example #16
0
                                       R_Cramer_y=R_Cramer_y,
                                       R_cont_x=R_cont_x,
                                       R_Cramer_x=R_Cramer_x,
                                       dic=dic,
                                       normalize=True,
                                       verbose=True,
                                       path_rslt)

delta_time = round((datetime.now() - begin).total_seconds(), 1)
print('Données préparées en ' + str(delta_time) + 's')

#============================================================================
#               K-means
begin = datetime.now()
repartition = K_means.K_means(dfX,
                              Y,
                              nb_clusters_init=nb_clusters,
                              methode_prediction=method_prediction)
delta_time = round((datetime.now() - begin).total_seconds(), 1)
print('Clusters effectués en ' + str(delta_time) + 's')

#===========================================================================
#              IC
begin = datetime.now()
cluster_stat = IntConf.IntConf(repartition, alpha=0.85, path_rslt)
print(cluster_stat)
delta_time = round((datetime.now() - begin).total_seconds(), 1)
print('Intervalle de confiance éffectué en ' + str(delta_time) + 's')

#===========================================================================
#              Prediction
index = ~Y.isnull()
Example #17
0
 def K_means(self):
     return K_means.ClusterPlot(self.file_path, self.clusters).main()
Example #18
0
import sys, os
import numpy as np
sys.path.append(os.getcwd() + r'\Modules')
import matplotlib.pyplot as plt
from sklearn.datasets import make_circles
import DBSCAN as db
import K_means as km

np.random.seed(0)
X, y = make_circles(n_samples=400, factor=.3, noise=.05)

dbscan = db.DBSCAN(eps=0.5, min_sample=5)
db_assignments = dbscan.fit_transform(X)
kmeans = km.K_Means(n_clusters=2)
km_centers, km_assignments = kmeans.fit_transform(X)

plt.figure(20)
plt.scatter(X[:, 0], X[:, 1], c=db_assignments)
plt.figure(21)
plt.scatter(X[:, 0], X[:, 1], c=km_assignments)
plt.show()
Example #19
0
def clustering(NC, ref_imgs, img_infos):
    clusters, centroids = K.KMeans_clustering(ref_imgs, NC, img_infos)
    np.save(clusters, 'clusters' + NC + '.npy')
    np.save(centroids, 'centroids' + NC + '.npy')
    for idx in range(nP):
        # The random value to be added to the gene.
        for i in range(length):
            random_value = np.random.uniform(0.0, 1.0)
            cluster.__dict__['populasi'][i][idx][3] + random_value

    return cluster


def getCentroidPop(i):
    cluster.centroids = cluster.populasi[i]


k = 3
fitness = []
cluster = K_means.Kmeans('seeds.txt', k)

#------------------------- POPULATION, CHROMOSOME, CLUSTERING -------------------------#
createPopulation()

for i in range(0, 70):
    getCentroidPop(i)  # ngambil centroid d masing2 populasi
    cluster.clustering()
    centroid, SSE, acc = cluster.groupData()
    result = centroid, SSE, acc
    fitness.append(result)
    # per chromosome->centroid punya fitness function
    # print("SSE chromosome: %.2f" % SSE)
    # print("Acc chromosome: %.2f" % acc)

#--------------------------------- SELECTION ----------------------------------#
Example #21
0
#!/usr/bin/env python
# coding: utf-8

# In[4]:
import K_means
import DB
import numpy as np
import pandas as pd
import sys
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# In[ ]:

test_file_name = sys.argv[1]
kmeans_labels = K_means.km_clustering(test_file_name)
km = kmeans_labels.reshape(((len(kmeans_labels)), 1))
dbscan_labels = DB.dbscan_clustering(test_file_name)
dbscan = dbscan_labels.reshape(((len(dbscan_labels)), 1))

for i in range(len(kmeans_labels)):
    km[i] = km[i] + 1
    dbscan[i] = dbscan[i] + 1
final = np.append(km, dbscan, axis=1)
print('Saved the output labels in a csv file')
result = pd.DataFrame(final, columns=['K-Means', 'DBSCAN'])
result.to_csv('Label_Output.csv')
Example #22
0
def workflow(fichier,fichier_sortie, path, path_rslt, taille_ech, suffix_table, begin_distributed,fich_vae,fich_deb) :  
    #=======================================================================
    #                Parametrage statistique
    nb_clusters = taille_ech//1000
    R_cont_y = 0.25; R_Cramer_y = 0.20; R_cont_x = 0.85; R_Cramer_x = 0.80
    method_disc = 'Cramer'; method_continuous = 'regression'
    method_prediction = 'Ridge' #'random_forest'
    dic={'SGMT_PF_V4':['SGMT_PF_V4',1],'SGMT_PF_AXE_FIDELITE_V4':['SGMT_PF_AXE_FIDELITE_V4',3],'REVENU_EST_M':['REVENU_EST_M',3]}
    
    #============================================================================
    #                 Préparation des données
    begin = datetime.now()
    dfX, Y, R_dico = data_preparation.main(
                fichier = fichier,fichier_sortie= fichier_sortie,method_disc=method_disc,method_continuous=method_continuous,taille_ech = taille_ech,
                R_cont_y = R_cont_y , R_Cramer_y = R_Cramer_y, R_cont_x = R_cont_x, R_Cramer_x = R_Cramer_x, dic=dic,
                normalize = True, verbose = True, path_rslt=path_rslt, suffix_table=suffix_table )

    delta_time = round((datetime.now() - begin).total_seconds(),1)
    print('Données préparées en ' + str(delta_time) + 's')

    #============================================================================
    #               K-means
    begin = datetime.now()
    repartition = K_means.K_means(dfX,Y,nb_clusters_init= nb_clusters,methode_prediction=method_prediction)
    delta_time = round((datetime.now() - begin).total_seconds(),1)
    print('Clusters effectués en ' + str(delta_time) + 's')

    #===========================================================================
    #              IC
    begin = datetime.now()
    cluster_stat = IntConf.IntConf(repartition,alpha = 0.85,path_rslt=path_rslt, suffix_table=suffix_table )
    print(cluster_stat)
    delta_time = round((datetime.now() - begin).total_seconds(),1)
    print('Intervalle de confiance éffectué en ' + str(delta_time) + 's')

    #===========================================================================
    #              Prediction
    index = ~Y.isnull()
    dfX_train, dfX_test, Y_train, Y_test = train_test_split(dfX[index], Y[index],test_size = 0.4, random_state = 44)

    del dfX_train, dfX_test, Y_train
    result, result_test=prediction.IC_reg(repartition, dfX, Y,path_rslt=path_rslt, suffix_table=suffix_table)
    result_1 = prediction.get_result(repartition, dfX, Y, Y_test, method = method_prediction)
    #prediction.regression_graph(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred'], col = 'black')
    score_abs, score_rel = prediction.get_regression_score(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred'])
    print('Erreur moyenne de prevision: ' + str(score_abs) + ' (' + str(score_rel) + '%)')
    curve_Recall, AUC_ROC = prediction.classification_curve(Y_real = result_1['Y_real'], Y_pred = result_1['Y_pred'],
                       threshold_rich = 100000, col = 'black')
    print('AUC_ROC: '+str(AUC_ROC))
    print('AUC Precision-Recall: '+str(curve_Recall))
    #prediction.graph_variable_influence(dfX,result_1,col='black')

    #===========================================================================
    #              file of parameters 

    if os.path.exists(path+"Compare.xlsx") == False:
        S=pd.DataFrame(columns=["reference","data_x","data_y","method disc","method continuous","method prevision","Date",
        "Nb ligne","% NaN","k cluster","R_cont_y","R_Cramer_y","R_cont_x","R_Cramer_x","Nb var quali","Nb var quant",
        "Nb regroupement","score","score relative","AUC ROC","curve Recall","temps de calcul"])
        S.to_excel(path+"Compare.xlsx",encoding="utf-8", index=False) #,sep=";",encoding="utf-8"

    Nb_var_quant=len(R_dico['variables continues gardees']);
    Nb_var_quali=len(R_dico['variables discretes gardees']);
    Nb_regroupement=len(R_dico['groupe variables'])
    date = "%s" % datetime.now()
    NaN = len(Y[Y.isnull()==True])/Y.shape[0]
    delta_time = round((datetime.now() - begin_distributed).total_seconds(),1)

    wb = load_workbook(path+"Compare.xlsx")
    sheet = wb.get_sheet_by_name('Sheet1')
    reference = sheet.max_row + 1
    ABC=list("ABCDEFGHIJKLMNOPQRSTUV")
    liste=[reference,fich_deb,fich_vae,method_disc,method_continuous,method_prediction,date,taille_ech,str(NaN.__round__(2)),
           nb_clusters,R_cont_y,R_Cramer_y,R_cont_x,R_Cramer_x,Nb_var_quali,Nb_var_quant,Nb_regroupement,score_abs,
           str(score_rel)+'%',str(AUC_ROC),str(curve_Recall),str(delta_time) + 's']
    for i in range(len(ABC)):
        sheet[ABC[i] + str(reference)].value = liste[i]
        # except ValueError : 
            # print ("liste " + liste[i])
            # print("i " + str(i))
            # print ("ABC " + ABC[i])
            # print ("refer " + str(reference))
            # print ("sheet " + sheet[ABC[i] + str(reference)].value)
            # raise ValueError("C'est ici que ça plante")
    wb.save(path+"Compare.xlsx")

    mon_fichier = open(path_rslt + "fichier" + suffix_table + ".txt", "w")
    mon_fichier.write('liste variables qualitatives :')
    a=R_dico['variables discretes gardees'].sort(['R2'],ascending=[False])
    for i in a.values:
        mon_fichier.write('\n        '+str(i))

    a=R_dico['variables continues gardees'].sort(['R2'],ascending=[False])    
    mon_fichier.write('\n \nliste variables quantitatives :')
    for i in a.values:
        mon_fichier.write('\n        '+str(i))
   
    mon_fichier.write('\n \nliste groupe variables :')
    mon_fichier.write('\n          R²,    groupe variables correlees,    variable representante')
    for i in R_dico['groupe variables'].values:
        mon_fichier.write('\n        '+str(i))

    delta_time = round((datetime.now() - begin_distributed).total_seconds(),1)    
    mon_fichier.write('\n \nTemps de calcul est ' + str(delta_time)+ 's')
    mon_fichier.close() 
    print('Temps total de calcul du paquet est ' + str(delta_time) + 's')
Example #23
0
File: LVQ.py Project: jayshonzs/ESL
    
    t = np.zeros(15)
    for i in range(15):
        if i < 5:
            t[i] = 0
        elif i < 10:
            t[i] = 1
        else:
            t[i] = 2
    plt.scatter(model[:, 0], model[:, 1], s=150, c=t, cmap=mycm)
    
    plt.xlim([0, 10])
    plt.ylim([0, 10])
    
    plt.show()

if __name__ == '__main__':
    X = simulate_data.loaddata()
    t = []
    for i in range(300):
        if i < 100:
            t.append(0)
        elif i < 200:
            t.append(1)
        else:
            t.append(2)
    k_means_model = K_means.train(X)
    init_model = extract_centers(k_means_model)[0]
    model = train(X, init_model)
    draw(X, np.array(t), model)
Example #24
0
    # so we have the spanning matrix
    C_matrix = eigenvectors.T[eigenvalue_order]
    return C_matrix


if __name__ == "__main__":
    dirlist = os.listdir()
    text = None
    for file in dirlist:
        if '2d' in file:
            text = file
    data = np.loadtxt(text, delimiter=',')
    x_origin = data[0]
    y_origin = data[1]
    # plot the origin picture
    K_means.plot_image(x_origin, y_origin)
    centered_data = center(data)
    C_matrix = -1 * compute_pca(centered_data, 10**-5)
    print("The spanning matrix:\n", C_matrix)
    # plot the vector arrow
    x_vector = np.zeros(3)
    y_vector = np.zeros(3)
    x_vector[:2] = C_matrix.T[0]
    y_vector[:2] = C_matrix.T[1]
    K_means.plot_image(x_vector, y_vector)
    # Now we transform the origin data
    data_transformed = np.dot(C_matrix, data)
    x_transformed = data_transformed[0]
    y_transformed = data_transformed[1]
    K_means.plot_image(x_transformed, y_transformed)