def CMeans_CIQ(S, K, n_iterations=None, init_array=None): """ K-MeansによるCIQ 初期値と最大イテレーション数を与えられたときのみ(たぶんPSO-CIQとかCQ-ABCを適用するときとか) それらを引数として与えて処理する. :param S: 入力画像を,(画素数, 1, 3)とreshapeしている :param K: :param n_iterations: :param init_array: :return: """ if n_iterations: cmeans_output = cmeans(data=S, c=K, maxiter=n_iterations, m=2) else: cmeans_output = cmeans(data=S.T, c=K, m=2.0, maxiter=300, error=0.001) centers = cmeans_output[0] centers = np.reshape(centers, (K, 3)).astype(np.int) return centers
def fit_predict(self, X: np.ndarray) -> np.ndarray: ''' train and predict - params: - X : np.ndarray (S, N) : where S is the number of features, and N is the number of samples - return: - Y : np.ndarray (1, N) : output ''' cntr, u, u0, d, jm, p, fpc = cmeans(data=X, c=self.params['n_clusters'], m=self.params['m'], error=self.params['tol'], maxiter=self.params['max_iter'], init=None, seed=self.params['random_state']) self.params['centers'] = cntr self.results['u'] = u self.results['u0'] = u0 self.results['d'] = d self.results['jm'] = jm self.results['p'] = p self.results['fpc'] = fpc y = np.transpose(np.argmax(u, axis=0)) return np.expand_dims(y, axis=1)
def fcm(data, c, m=2): center, u, u0, d, jm, p, fpc = cmeans(data.T, c, m=m, error=1e-6, maxiter=20) # u_m = u ** m u_m = u D = np.sqrt(np.sum(np.square(data[:, None] - center), axis=2)) radius = np.asarray([np.sum(u_m[i] @ D[:, i]) / np.sum(u_m[i]) for i in range(c)]) return center, radius
def dimensionReductionandClustering(word_data_points): word_data_array = numpy.array(word_data_points) pca = PCA(n_components=2) result = pca.fit_transform(word_data_array) result_T = result.transpose() k = 6 cntr, mem_matr, u0, d, jm, p, fpc = cmeans(result_T, k, 2, error=0.005, maxiter=1000, init=None) return mem_matr,k
def iteration(X, U, V, labels, p, logger): from skfuzzy.cluster import cmeans V, U, _, _, _, t, _ = cmeans(X.T, len(V), 1.05, 1e-1, 200, U.T) metric_now = nmi_acc(U.T, labels) return U.T, V.T, t, metric_now
def fcm(n_clusters, vector): cntr, u, u0, d, jm, p, fpc = cmeans(vector.T, n_clusters, 2.5, 0.0001, 1000, seed=0) return {"center": cntr, "membership": u, "label": u.argmax(axis=0)}
def fkmeans(data_matrix, number_of_clusters, fuzzy_parameter, error, maximun_iterations): fmeans = cmeans(data_matrix.T, number_of_clusters, fuzzy_parameter, error, maximun_iterations) centroids = fmeans[0] kmeans = KMeans(n_clusters=number_of_clusters, algorithm='full').fit(centroids) return kmeans.cluster_centers_, kmeans.predict( data_matrix), davies_bouldin_score(data_matrix, kmeans.predict(data_matrix))
def CMeans_cluser(useful_feature, n, data_id, data_score, __m=2.0): data_columns = useful_feature.columns KFCM_result = np.matrix(useful_feature) KFCM_result = KFCM_result.T center, u, u0, d, jm, p, fpc = cmeans(KFCM_result, m=__m, c=n, error=0.00000001, maxiter=100000) # print('end KFCM') u = u.T final_location = normalise_U(u) label = [] for i in final_location: i = list(i) temp = i.index(1) label.append(temp) score_sil = metrics.silhouette_score(useful_feature, label, metric='euclidean') print("当聚为%d簇时,KFCM轮廓系数Silhouette Coefficient为:%f" % (n, score_sil)) # 计算轮廓系数 score_cal = metrics.calinski_harabaz_score(useful_feature, label) print("当聚为%d簇时,KFCM轮廓系数Calinski-Harabaz Index为:%f" % (n, score_cal)) KFCM_result = KFCM_result.T KFCM_result = pd.DataFrame(KFCM_result) KFCM_result.columns = data_columns KFCM_result['label'] = label KFCM_result['overall'] = data_score center_overall_sum = {} for i in range(n): temp = (KFCM_result[KFCM_result.label == i])['overall'].sum() key_word = '第' + str(i) + '类' center_overall_sum[key_word] = temp center_overall_sum = sorted(center_overall_sum.items(), key=lambda x: x[1], reverse=True) Old_label = [] New_label = [] num = 0 for i in center_overall_sum: num += 1 Old_label.append(int(i[0][1])) New_label.append(num) label = list(deepcopy(KFCM_result['label'])) temp = [] for i in range(len(label)): for j in range(n): if label[i] == Old_label[j]: temp.append(New_label[j]) KFCM_result.drop('label', axis=1, inplace=True) KFCM_result['label'] = temp KFCM_result.insert(0, 'eventid', data_id) return KFCM_result
def findClusters_cmeans(data): ''' Cluster data using fuzzy c-means clustering algorithm ''' # create the classifier object return cl.cmeans( data, c=5, # number of clusters m=2, # exponentiation factor # stopping criteria error=0.01, maxiter=300)
def fit(self, x, n_rules=5): """ todo initiate the rule class, actually, rules are initially constructed by a combination of cluster center, std and labels :param x: the data where rules are generated :param n_rules: number of the rules, namely the number of cluster centers """ center_list, data_partition, _, _, _, _, _ = \ cmeans(x.t(), n_rules, 2, error=0.005, maxiter=1000) self.n_rules = n_rules self.center_list = torch.tensor(center_list) self.data_partition = torch.tensor(data_partition).t() self.consequent_list = None self.widths_list = self.get_widths_list(x)
def fit(self, X, Y=None): if self.n_clusters is None: self.n_clusters = int( (len(X) / 2)**0.5) # heuristic from pevec2013 self.labels_ = np.arange(self.n_clusters) centroids, u, _, dists, _, _, fpc = cmeans(X.T, self.n_clusters, self.exp, self.error, self.max_iter) self.centroids = centroids self.fpc = fpc # print(n_clusters, "#clusters", self.ref.N, "trN") # print(centroids.shape, "centroids") # (n_clusters, n_features) # print(u.shape, "u") # (n_clusters, N), membership grades # print(dists.shape, "dists") # (n_clusters, N) return self
def findClusters_cmeans(data): ''' Cluster data using fuzzy c-means clustering algorithm ''' # create the classifier object return cl.cmeans( data, c = 5, # number of clusters m = 2, # exponentiation factor # stopping criteria error = 0.01, maxiter = 300 )
def FCM(preprocessing = 'PCA', M = 1.5, Error = 0.005, Maxiter = 1000, pre_kernel = 'rbf'): if preprocessing == 'PCA': X, y = use_PCA('iris_data.txt') elif preprocessing == 'KPCA': X, y = use_KPCA('iris_data.txt', kernel = pre_kernel) elif preprocessing == 'LDA': X, y = use_LDA('iris_data.txt') elif preprocessing == 'None': loader = datasets.load_iris() X, y = loader['data'], loader['target'] else: print('Please choose a data preprocessing method from the following method:\n') print('1.PCA, 2.KPCA, 3.LDA, 4.None') return X = X.T center, u, u0, d, jm, p, fpc = cmeans(X, m = M, c=3, error = Error, maxiter = Maxiter) for i in u: label = np.argmax(u, axis=0) fig1 = plt.subplot(1,2,1) fig1.set_title('Data after preprocessing') for i, tag in enumerate(y): if tag == 0: fig1.scatter(X[0][i], X[1][i], c='r') elif tag == 1: fig1.scatter(X[0][i], X[1][i], c='g') elif tag == 2: fig1.scatter(X[0][i], X[1][i], c='b') fig2 = plt.subplot(1,2,2) fig2.set_title('Clustering result') for i, label in enumerate(label): if label == 0: fig2.scatter(X[0][i], X[1][i], c='r') elif label == 1: fig2.scatter(X[0][i], X[1][i], c='g') elif label == 2: fig2.scatter(X[0][i], X[1][i], c='b') plt.show()
def process_data(data, centers): cntr, u, u0, d, jm, p, fpc = cmeans(data, centers, 2, error=0.005, maxiter=10000, init=None) cluster_membership = np.argmax(u, axis=0) data = pd.DataFrame(data=data) data['target'] = cluster_membership g = sns.FacetGrid(data, hue='target', palette='tab20', size=5) g.map(plt.scatter, 0, 1, s=100, linewidth=.5, edgecolor='white') g.add_legend() # for i in range(centers): # plt.scatter(data[cluster_membership == i]) # for pt in cntr: # plt.plot(pt[0], pt[1], 'rs') plt.show()
def classify(schoolList, schoolname, num1, num2): schoolArray = np.array(schoolList) schoolArray.dtype = np.float64 schoolArray = schoolArray.T center, u, u0, d, jm, p, fpc = cmeans(schoolArray, m=1.5, c=3, error=0.005, maxiter=1000) print(center) print(fpc) for i in u: label = np.argmax(u, axis=0) kind1 = [] kind2 = [] kind3 = [] targetList1 = [] targetList2 = [] for row in center: targetList1.append(row[num1]) targetList2.append(row[num2]) kind_num1 = targetList1.index(max(targetList1)) kind_num2 = targetList2.index(max(targetList2)) for i in range(0, len(schoolList)): if label[i] == 0: kind1.append([schoolname[i], label[i]]) elif label[i] == 1: kind2.append([schoolname[i], label[i]]) else: kind3.append([schoolname[i], label[i]]) return kind1, kind2, kind3, kind_num1, kind_num2
def fmeans(data_matrix, number_of_clusters, fuzzy_parameter, error, maximun_iterations): #alter fmeans = cmeans(data_matrix.T, number_of_clusters, fuzzy_parameter, error, maximun_iterations) centroids = fmeans[0] datapoint_no = 0 cluster_assignment_list = np.zeros( data_matrix.shape[0]).astype('int32') while (datapoint_no < data_matrix.shape[0]): distance = np.linalg.norm(data_matrix[datapoint_no] - centroids[0]) assigned_cluster_no = 0 centroid_no = 1 while centroid_no < number_of_clusters: tmp_distance = np.linalg.norm(data_matrix[datapoint_no] - centroids[centroid_no]) if tmp_distance < distance: distance = tmp_distance assigned_cluster_no = centroid_no centroid_no = centroid_no + 1 cluster_assignment_list[datapoint_no] = assigned_cluster_no datapoint_no = datapoint_no + 1 return centroids, cluster_assignment_list
def process(self, img, **kwargs): n_clusters = kwargs.get('n_clusters', 3) m = kwargs.get('m', 2) eps = kwargs.get('eps', 0.01) max_it = kwargs.get('max_it', 100) numpass = kwargs.get('numpass', 5) median_radius = kwargs.get('median_radius', 10) if isinstance(img, Dataset): img = img.pixel_array img, _ = median_otsu(img, numpass=numpass, median_radius=median_radius) flat = img.reshape((1, -1)) c, u, a1, a2, a3, a4, a5 = cmeans(flat, n_clusters, m, eps, max_it) tumor_index = np.argmax(c, axis=0) defuz = np.argmax(u, axis=0) mask = np.full(defuz.shape[0], 0, dtype=np.uint16) mask[defuz == tumor_index] = 1 mask = mask.reshape(img.shape) k1 = np.ones((3, 3), np.uint16) k2 = np.ones((5, 5), np.uint16) mask = cv.erode(mask, k2, iterations=1) mask = cv.dilate(mask, k1, iterations=1) mask = cv.erode(mask, k2, iterations=2) mask = cv.dilate(mask, k1, iterations=5) return mask
def class3_output(hz_fft3, num_fft3): max_sum = 0 max_index = -1 min_sum = 0 min_index = -1 fft = [] # 最大周波数とピーク数のリストを一つのリストにまとめる for i in range(len(hz_fft3)): fft.append([hz_fft3[i], num_fft3[i]]) fft = np.array(fft) # 最小-1,最大1にリストを正規化 normal_fft = scipy.stats.zscore(fft).tolist() # 正規化されたリストから最小の和と最大の和のリストを抽出 for i in range(len(normal_fft)): sum_fft = normal_fft[i][0] + normal_fft[i][1] if max_sum < sum_fft: max_sum = sum_fft max_index = [normal_fft[i][0], normal_fft[i][1]] if min_sum > sum_fft: min_sum = sum_fft min_index = [normal_fft[i][0], normal_fft[i][1]] #分類対象のデータのリスト。各要素はfloatのリスト vectors = normal_fft #分類対象のデータをクラスタ数3でクラスタリング centers = cmeans(np.array(vectors).T, 3, m, 0.003, 10000) u = centers[1].T class_list = [] label = [] for i in u: #print(i, np.amax(i)) if np.amax(i) < fuzzyValue: class_list.append(-1) else: class_list.append(np.argmax(i)) label.append(np.argmax(i)) plot_label = class_list ''' for i in normal_fft: label_input = (k_means.near(i, centers)) plot_label.append(label_input) # 0,1:noise→0 2:feature→1 #if label_input==0 or label_input==1: # label.append(0) #else: # label.append(1) label.append(label_input) ''' ''' # 各特徴点の平均値をラベルに従いプロットする fft_0x = [] fft_0y = [] fft_1x = [] fft_1y = [] fft_2x = [] fft_2y = [] print(label) for i in range(len(normal_fft)): if label[i]==0: fft_0x.append(normal_fft[i][0]) fft_0y.append(normal_fft[i][1]) elif label[i]==1: fft_1x.append(normal_fft[i][0]) fft_1y.append(normal_fft[i][1]) else: fft_2x.append(normal_fft[i][0]) fft_2y.append(normal_fft[i][1]) # figure fig = plt.figure(figsize=(14,10)) ax = fig.add_subplot(1, 1, 1) # plot ax.scatter(fft_0x, fft_0y, color='g', s=36) ax.scatter(fft_1x, fft_1y, color='b', s=36) ax.scatter(fft_2x, fft_2y, color='r', s=36) plt.title('Method-3', fontsize=36) plt.xlabel('vector in x', fontsize=36) plt.ylabel('vector in y', fontsize=36) plt.tick_params(labelsize=36) plt.savefig('D:/opticalflow/cmeans2/plt/class3/' + videoName[:-4] + '_figure.png') ''' fileName = 'D:/opticalflow/cmeans2/plt/class3/' + videoName[:-4] + '_Cmeans_figure.png' plot_data(fft, u, filename=fileName) return class_list, u
def class1_output(k_err, zahyou): x_err = [] y_err = [] err = [] max_err = 0 min_err = 0 max_index = -1 min_index = -1 # 特徴点ごとのx,y座標の絶対誤差をそれぞれ一つの配列にまとめる for i in range(len(k_err)): for j in range(len(k_err[0])): x_err.append(k_err[i][j][0]) y_err.append(k_err[i][j][1]) # 最小-1,最大1で正規化する x_err_normal = scipy.stats.zscore(x_err).tolist() y_err_normal = scipy.stats.zscore(y_err).tolist() # 正規化したx,y座標の絶対誤差のリストを一つのリストにまとめる for i in range(len(x_err)): err.append([x_err_normal[i], y_err_normal[i]]) # リストのx,y座標の和が最大のものを抽出する if max_err < x_err_normal[i] + y_err_normal[i]: max_index = [x_err_normal[i], y_err_normal[i]] # リストのx,y座標の和が最小のものを抽出する elif min_err > x_err_normal[i] + y_err_normal[i]: min_index = [x_err_normal[i], y_err_normal[i]] #分類対象のデータのリスト。各要素はfloatのリスト vectors = err #分類対象のデータをクラスタ数3でクラスタリング centers = cmeans(vectors.T, 3, 2, 0.003, 10000) u = centers[1].T label = u plot_label = u ''' for i in normal_fft: label_input = (k_means.near(i, centers)) plot_label.append(label_input) # 0,1:noise→0 2:feature→1 #if label_input==0 or label_input==1: # label.append(0) #else: # label.append(1) label.append(label_input) ''' # 特徴点ごと実行:フレーム数分k-meansで分類して一番多い分類を採用する label = [] k_err = scipy.stats.zscore(k_err).tolist() zahyou_ave = [] # 分類したデータで各特徴点をクラスタリングする for frame in k_err: tmp = [] sum_x = 0 sum_y = 0 # フレームごとにクラスタリングする for i in frame: tmp.append(k_means.near(i, centers)) sum_x += i[0] sum_y += i[1] # クラスタリングした結果の最頻値をラベル付けする label_input = mode(tmp) # 絶対誤差の平均を計算する zahyou_ave.append([sum_x / len(frame), sum_y / len(frame)]) # 0:noise→0 1,2:feature→1 #if label_input == 2 or label_input == 1: # label_input = 1 #else: # label_input = 0 label.append(label_input) # 一番多い数字をlabelに追加 x0 = [] y0 = [] x1 = [] y1 = [] x2 = [] y2 = [] #print(label) # 各特徴点の平均値をラベルに従いプロットする for index, zahyou_data in enumerate(zahyou_ave): if label[index] == 0: x0.append(zahyou_data[0]) y0.append(zahyou_data[1]) elif label[index] == 1: x1.append(zahyou_data[0]) y1.append(zahyou_data[1]) else: x2.append(zahyou_data[0]) y2.append(zahyou_data[1]) # figure fig = plt.figure(figsize=(14, 10)) ax = fig.add_subplot(1, 1, 1) # plot ax.scatter(x0, y0, color='r') ax.scatter(x1, y1, color='b') ax.scatter(x2, y2, color='g') #plt.title('Method-1', fontsize=36) plt.xlabel('victor in x', fontsize=36) plt.ylabel('victor in y', fontsize=36) plt.tick_params(labelsize=36) # プロットした画像を保存する plt.savefig('D:/opticalflow/cmeans2/plt/class1/' + videoName[:-4] + '_figure.png') return label
import numpy as np import matplotlib.pyplot as plt from skfuzzy.cluster import cmeans a = np.array([[1, 3], [1.5, 3.2], [1.3, 2.8], [3, 1]]) cntr, u, _, _, _, _, _ = cmeans(a.T, c=2, m=2, error=0.005, maxiter=1000) print(cntr, '\n-----------------\n', u)
clustering.labels_ #绘制聚类谱图,绘制聚类谱图必须指定distance_threshold plt.title('Hierarchical Clustering Dendrogram') # plot the top three levels of the dendrogram plot_dendrogram(clustering, truncate_mode='level') plt.xlabel("Number of points in node (or index of point if no parenthesis).") plt.show() #2.模糊聚类 data=pd.read_csv(r"D:\书籍资料整理\多元统计分析\表3-7.csv") train=data[['人均国内生产总值','粗死亡率','粗出生率','城镇人口比重','平均预期寿命','65岁及以上人口比重']].apply( lambda x: (x - np.mean(x)) / (np.std(x))) train =np.asarray(train) # train=preprocessing(train) train=train.T center, u, u0, d, jm, p, fpc = cmeans(train, m=2, c=3, error=0.0001, maxiter=1000) #center:聚类的中心 #u是最后的的隶属度矩阵 #u0是初始化的隶属度矩阵 #d是最终的每个数据点到各个中心的欧式距离矩阵。 #jm是目标函数优化的历史。 #p是迭代的次数。 #fpc全称是fuzzy partition coefficient,是一个评价分类好坏的指标。它的范围是0到1,1是效果最好。后面可以通过它来选择聚类的个数。 result=u.T result=pd.DataFrame(result,columns=['[,1]','[,2]','[,3]']) result['country']=data['国家和地区'] #书中使用的是R的fanny函数,这里的结论与那个有些不同。
# For reproducibility np.random.seed(1000) if __name__ == '__main__': # Load the dataset digits = load_digits() X = digits['data'] / 255.0 Y = digits['target'] # Perform a preliminary analysis Ws = [] pcs = [] for m in np.linspace(1.05, 1.5, 5): fc, W, _, _, _, _, pc = cmeans(X.T, c=10, m=m, error=1e-6, maxiter=20000, seed=1000) Ws.append(W) pcs.append(pc) # Show the results sns.set() fig, ax = plt.subplots(1, 5, figsize=(20, 4)) for i, m in enumerate(np.linspace(1.05, 1.5, 5)): ax[i].bar(np.arange(10), -np.log(Ws[i][:, 0])) ax[i].set_xticks(np.arange(10)) ax[i].set_title(r'$m={}, P_C={:.2f}$'.format(m, pcs[i])) ax[0].set_ylabel(r'$-log(w_0j)$')
# x = agg[['player_assists', 'player_dbno', 'player_dist_ride', agg_se = agg.loc[(agg["party_size"] == 2)].copy() x = agg_se[[ 'player_assists', 'player_dbno', 'player_dist_ride', 'player_dist_walk', 'player_dmg', 'player_kills', 'player_survive_time' ]].copy() x_correlaton = x.corr() # x = x.apply (lambda x: (x-x.min())/(x.max()-x.min())) x = x.apply(lambda x: (x - x.mean()) / x.std(), axis=0) x_T = x.T cntr, u, u_0, d, obj_value, num_of_iter, fpc = sc.cmeans(data=x_T, c=3, m=2, maxiter=100, error=0.005) print(u.shape) print(obj_value[-1]) # plt.plot(obj_value) # plt.show() cluster_list = u.argmax(axis=0) agg_se["class"] = cluster_list num_class = np.bincount(cluster_list) print(num_class) print(cluster_list) agg1 = agg_se.loc[(agg_se['class'] == 0)] agg2 = agg_se.loc[(agg_se['class'] == 1)]
import numpy as np from skfuzzy.cluster import cmeans from sklearn.cluster import KMeans from sklearn.preprocessing import scale from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score iris = load_iris() data = scale(iris.data) n_samples, n_features = data.shape n_iris = len(np.unique(iris.target)) target = iris.target estimator = KMeans(n_clusters=3) labels = estimator.fit_predict(data) print ('K-Means Algorithm Accuracy:', accuracy_score(target, labels)) centr, u_origin, _, _, _, _, fpc = cmeans(data, c=10, m=2, error=0.005, maxiter=1000) print('Fuzzy C-Means Accuracy:', fpc)
tsne = TSNE(n_components=n_components, learning_rate=0) tsne_vectors = tsne.fit_transform(np.asfarray(word_vectors, dtype='float64')) num_clusters = 2 start = time.time() # Start time print('INFO: Clustering: ', num_clusters, ' clusters') # if not soft: kmeans_clustering = KMeans(n_clusters=num_clusters) kclusters = kmeans_clustering.fit_predict(word_vectors) # else: word_vectors_transpose = word_vectors.transpose() cntr, u, u0, d, jm, p, fpc = cluster.cmeans(word_vectors_transpose, num_clusters, 2, error=1e-4, maxiter=300, init=None) # cclusters = np.argmax(u, axis=0) # print(cclusters, cclusters.shape) # cclusters_fuzzy = get_clusters(u, limit=1/num_clusters) cclusters_fuzzy = get_clusters(u, limit=0.35) # cl = get_clusters(u, n_components=1) # print(cclusters_fuzzy[-4], cclusters_fuzzy.shape) # exit(0) end = time.time() elapsed = end - start print("INFO: Time of clustering: ", elapsed, "seconds") jet = cm = plt.get_cmap('jet')
plt.imshow(I0,cmap='gray'),plt.title('original image') plt.show() img1 = I0.reshape(I0.size,1) # K-Means criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,10,1.0) flags = cv2.KMEANS_RANDOM_CENTERS compactness,labels,centers = cv2.kmeans(img1,4,None,criteria,10,flags) I1 = labels.reshape(I0.shape) plt.imshow(I1),plt.title('kmeans') plt.show() # Fuzzy CMeans center, u, u0, d, jm, p, fpc = cmeans(img1.T, m=3, c=3, error=0.005, maxiter=1000) Labels = np.zeros(I0.size) for ii in range(0,I0.size): label = np.where(u[:,ii] == max(u[:,ii])) Labels[ii] = label[0] I2 = Labels.reshape(I0.shape) plt.imshow(I2),plt.title('cmeans') plt.show() # WM = np.where(I1==I1[100,94],1.0,0) plt.imshow(WM) plt.show() kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)) erosion = cv2.erode(WM,kernel,iterations = 1) dilation = cv2.dilate(WM,kernel,iterations = 1)
def perform_fuzzy_clustering(training: np.array, test: np.array, clusters: int, m: int) -> tuple: center, train_labels = cmeans(training.T, clusters, m, 0.005, 1000)[0:2] test_labels = cmeans_predict(test.T, center, m, 0.005, 1000)[0] return *(np.argmax(label, 0) for label in [train_labels, test_labels]),
train = np.array(tr) train = train.T fpc_all = [] #循环调参c #for j in range(2,40): # center, u1, u0, d, jm, p, fpc = cmeans(train, m=1.5, c=j, error=0.005, maxiter=1000000) # print(j) # fpc_all.append(fpc) #plt.figure() #plt.plot(range(2,40),fpc_all) #plt.grid(True, linestyle = "-.", color = "b", linewidth = "1") #plt.xlabel('Clustering number') #plt.ylabel('The fuzzy partition coefficient (FPC)') #plt.show() c_num = 3 # 设置聚类个数3/5/30 center, u1, u0, d, jm, p, fpc = cmeans(train, m=1.5, c=c_num, error=0.005, maxiter=1000000) for i in u1: label_1 = np.argmax(u1, axis=0) # 相同趋势类别,可视化 for j in range(c_num): t1 = np.where(label_1==j)[0] print("类别:%d,数量:%d"%(j,len(t1))) plt.figure() for i in range(len(t1)): plt.plot(range(w_size),tr[t1][i]) plt.ylim(-1000,2000) plt.show() # 保存标签数据 label_1 = pd.DataFrame(data = label_1) label_1.to_csv('label_3.csv')
def fuzzy_cmeans(data, n_of_clusters, *, m=1.07): n_of_clusters = n_of_clusters.cuda().cpu().detach().numpy().copy() result = cmeans(data.T, n_of_clusters, m, 0.001, 10000, seed=0) fuzzy_means = result[1].T result = torch.FloatTensor(np.array(fuzzy_means)).cuda() return result
from skfuzzy.cluster import cmeans, cmeans_predict from sklearn.datasets import load_digits # Set random seed for reproducibility np.random.seed(1000) if __name__ == '__main__': # Load the dataset digits = load_digits() X_train = digits['data'] / np.max(digits['data']) # Perform Fuzzy C-Means fc, W, _, _, _, _, pc = cmeans(X_train.T, c=10, m=1.25, error=1e-6, maxiter=10000, seed=1000) print('Partition coeffiecient: {}'.format(pc)) # Plot the centroids fig, ax = plt.subplots(1, 10, figsize=(10, 10)) for i in range(10): c = fc[i] ax[i].matshow(c.reshape(8, 8) * 255.0, cmap='gray') ax[i].set_xticks([]) ax[i].set_yticks([]) plt.show()
# End of for labels = np.asarray(labels) # Convert back to numpy array X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.4, random_state=0) print(X_train.shape) print(X_test.shape) print(y_train) print(y_test) n_samples, n_features = data.shape n_digits = len(np.unique(labels)) print("n_digits: %d, \t n_samples %d, \t n_features %d" % (n_digits, n_samples, n_features)) cntr, u, u0, d, jm, p, fpc = cmeans(data, 2, 2, error=0.005, maxiter=1000, init=None, seed=None) print(cntr.shape) # Predict u,u0,d,jm,p,fpc = cmeans_predict(data, cntr, 2, error=0.005, maxiter=1000, init=None, seed=None) # print('------ actual ----------') # print(y_train.shape) # print('------ predict ----------') # print(u.shape) # outputline = ','+ str(accuracy_score(y_train,u))+','+str(precision_score(y_train,u))+','+str(recall_score(y_train,u))+','+str(f1_score(y_train,u)) # f=open("out.csv", "a+") # f.write(outputline) # f.close()
def clustering(self): cntr, U, U0, d, Jm, p, fpc = cmeans(self.data, self.c, m=10, error=0, maxiter=1000) print cntr print '=======' print U, p
model = gensim.models.Word2Vec(copus, size=2, min_count=1, window=5, iter=1000) # model = gensim.models.FastText(copus, size=100, min_count=10, window=5, iter=100) wordvector = [] for word in model.wv.vocab.keys(): wordvector.append(model.wv[word]) print(model.wv.vectors) wordvector = np.array(wordvector) fuzzy_cmeans = cmeans(wordvector.T, 10, 2.5, 0.0001, 1000) cntr, u, u0, d, jm, p, fpc = fuzzy_cmeans print("クラスタ数 {}".format(10)) print("クラスタ中心 {}".format(cntr)) print("クラスタメンバーシップ") pprint(u) print("クラスタ割当") pprint(u.argmax(axis=0)) # kmeans = KMeans(n_clusters=10) # kmeans.fit(wordvector) # print("クラスタ数 {}".format(kmeans.n_clusters)) # print(kmeans.labels_)