def _prediction(self): """SOM function""" try: data = np.loadtxt('/home/mininet/testmininet/trainingdata1.txt', delimiter=',') names = [ 'Interval', 'Throughput(Mbits/0.5sec)', 'Bandwidth(Mbits/sec)', 'Jitter(ms)', 'Loss', 'Decision' ] sm = SOMFactory().build(data, normalization='var', initialization='random', component_names=names) sm.train(n_job=1, verbose='info', train_rough_len=15, train_finetune_len=15) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) line = open('/home/mininet/testmininet/pdata1.txt').readlines() log.debug(line) comp = line[0].split(",") del comp[len(comp) - 1] data2 = np.array([[ float(comp[0]), float(comp[1]), float(comp[2]), float(comp[3]), float(comp[4]) ]]) sm.cluster(5) pred = np.absolute(sm.predict_by(data2, 5)) self.details.write(comp[4] + "\t" + comp[1] + "\t" + str(pred[0]) + "\t" + str(topographic_error) + "\n") print(pred) if pred <= 0.5: print("No congestion") self._congdelay(pred) elif pred > 0.5: print("Congestion there for next 5 seconds atleast") self.prevpred = pred except IndexError: print("ERROR")
def cluster_category_data(df, scale_data='minmax', dim_red_method='som', use_elbow_method='True', cluster_method='hierarchical', n_clusters=None, verbose=1, perplexity=None): """ :param df: dataframe containing all the columns belonging to a category to be used in clustering :param scale_data: method to be used to scale the dataset :param dim_red_method: options are 'som', 'umap', 'tsne', None. If None, do clustering directly. :param use_elbow_method: if True, elbow method is used to find the optimum number of clusters. If False, n_clusters needs to be specified :param cluster_method: options are 'kmeans' and 'hierarchical'. In either case kmeans is used for the elbow method(because of the time required). :param n_clusters: If use_elbow_method is False, n_clusters needs to be given. :param verbose: If True, output the progress in clustering process :param perplexity: If method used is TSNE, perplexity nedds to be specified """ t = time.time() if scale_data == 'minmax': X = MinMaxScaler().fit_transform(df) elif scale_data == 'standard': X = StandardScaler().fit_transform(df) else: X = df.values if verbose: print(f'number of features = {df.shape[1]}') if dim_red_method == 'som': if verbose: print( 'Self Organising Maps is being used for dimensionality reduction...' ) opt_k = 2 max_s = -1 f = 0 for mapsize in [(30, 30)]: if verbose: print(f'map size = {mapsize}') sm = SOMFactory().build(X, normalization='var', initialization='pca', mapsize=mapsize) sm.train(n_job=1, verbose=False, train_rough_len=100, train_finetune_len=500) if use_elbow_method: model = KElbowVisualizer(KMeans(), k=20, timings=False) elbow = model.fit(sm.codebook.matrix).elbow_value_ if elbow and verbose: print(f'elbow value = {elbow}') if not elbow: if verbose: print('elbow not found') ms = -1 for k in range(2, 20): km_labels = KMeans(k).fit_predict(sm.codebook.matrix) s = silhouette_score(sm.codebook.matrix, km_labels) if s > ms: elbow = k else: elbow = n_clusters x = sm.project_data(X) labels, _, _ = sm.cluster(opt=elbow, cl_type=cluster_method) clabels = [] for i in range(X.shape[0]): clabels.append(labels[x[i]]) s_score = silhouette_score(X, clabels) if verbose: print(f'silhouette score = {round(s_score, 3)}') max_s = max(s_score, max_s) if (max_s == s_score): opt_k = elbow opt_labels = clabels opt_size = mapsize if (max_s > s_score): break if verbose: print(f'optimum mapsize = {opt_size}') print( f'optimum number of clusters = {opt_k} & silhouette score = {round(max_s,3)}' ) print(f'time taken = {round(time.time()-t,1)}') return opt_labels, opt_k elif dim_red_method: if dim_red_method == 'umap': print('UMAP is being used for dimensionality reduction...') embedding = umap.UMAP(n_components=2, n_neighbors=5, min_dist=0.0001, metric='euclidean', random_state=1, spread=0.5, n_epochs=1000).fit_transform(X) print('UMAP embedding done...') elif dim_red_method == 'tsne': print('t-SNE is being used for dimensionality reduction...') embedding = TSNE(perplexity=perplexity).fit_transform(X) print('t-SNE embedding is done...') if use_elbow_method: model = KElbowVisualizer(KMeans(), k=20, timings=False) elbow = model.fit(embedding).elbow_value_ else: elbow = n_clusters if cluster_method == 'kmeans': opt_labels = KMeans(elbow).fit_predict(embedding) elif cluster_method == 'hierarchical': opt_labels = AgglomerativeClustering(elbow).fit_predict(embedding) if verbose: s_score = silhouette_score(X, opt_labels) print( f'number of clusters = {elbow} and silhouette_score = {s_score}' ) return opt_labels, elbow else: if use_elbow_method: model = KElbowVisualizer(KMeans(), k=20, timings=False) elbow = model.fit(X).elbow_value_ else: elbow = n_clusters if cluster_method == 'kmeans': opt_labels = KMeans(elbow).fit_predict(X) elif cluster_method == 'hierarchical': opt_labels = AgglomerativeClustering(elbow).fit_predict(X) print(f'silhouette score = {round(silhouette_score(X,opt_labels),3)}') return opt_labels, elbow
centroids_hc_som_cons = som_hc_cons.groupby('Hierarchical Clustering').mean() centroids_hc_som_cons = centroids_hc_som_cons.drop(columns='Labels') # 14.1.1.1 Silhouette scores # Average silhouette score silhouette_avg_som_hc_cons = silhouette_score(std_cons, som_hc_cons['Hierarchical Clustering'].values) # Silhouette scores individual to each observation sample_silhouette_som_hc_cons = pd.DataFrame( silhouette_samples(std_cons, som_hc_cons['Hierarchical Clustering'].values), columns=['Value']) # Number of positives silhouette scores pos_sample_hc_cons = sample_silhouette_som_hc_cons[sample_silhouette_som_hc_cons.Value > 0].count() # 14.1.2 K-Means Clustering on top of SOM # Visualize to which of the k cluster from the k-means belongs each neuron k = 3 som_kmeans_cons = sm_consump.cluster(k) hits = HitMapView(10, 10, "Clustering", text_size=7) a = hits.show(sm_consump) # 'som_kmeans_cons' is a dataframe with a column 'K-means' that specifies to which cluster belongs each client som_kmeans_cons = pd.DataFrame(som_kmeans_cons, columns=['K_means']) som_kmeans_cons['Labels'] = range(mapsize_consump * mapsize_consump) som_kmeans_cons = final_clusters_consump.merge(som_kmeans_cons, how='inner', on='Labels', right_index=True) som_kmeans_cons = som_kmeans_cons.sort_index() # Verify the number of observations associated of each cluster and the cluster centroids coordinates count_obs_som_kmeans_cons = som_kmeans_cons.groupby('K_means').count() centroids_som_kmeans_cons = som_kmeans_cons.groupby('K_means').mean() centroids_som_kmeans_cons = centroids_som_kmeans_cons.drop(columns='Labels') # 14.1.2.1 silhouette scores
sm = SOMFactory().build(df.values, [50, 50], mask=None, mapshape='planar', lattice='rect', normalization='var', initialization='pca', component_names=list(df.columns)) sm.train(n_job=2, verbose='info', train_rough_len=30, train_finetune_len=20) with open( '/content/drive/My Drive/IC_Cristine/SOM/som_segundo.pkl', 'wb') as arq: pickle.dump(sm, arq) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) print("Topographic error = {0}; Quantization error = {1}".format( topographic_error, quantization_error)) view2D = sompy.mapview.View2D(100, 100, "rand data", text_size=14) view2D.show(sm, col_sz=5, which_dim="all", denormalize=True) cl = sm.cluster(n_clusters=4) h = sompy.hitmap.HitMapView(10, 10, 'hitmap', text_size=8, show_text=True) h.show(sm) u = sompy.umatrix.UMatrixView(50, 50, 'umatrix', show_axis=True, text_size=8, show_text=True) UMAT = u.build_u_matrix(sm, distance=1, row_normalized=False) UMAT = u.show(sm, distance2=1, row_normalized=False, show_data=True, contooor=True, blob=False)
# component planes view from sompy.visualization.mapview import View2D view2D = View2D(10, 10, "rand data", text_size=12) view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True) # U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=10, height=10, title='U-matrix') umat.show(sm) # do the K-means clustering on the SOM grid, sweep across k = 2 to 20 from sompy.visualization.hitmap import HitMapView K = 20 # stop at this k for SSE sweep K_opt = 18 # optimal K already found [labels, km, norm_data] = sm.cluster(K, K_opt) hits = HitMapView(20, 20, "Clustering", text_size=12) a = hits.show(sm) import gmplot gmap = gmplot.GoogleMapPlotter(54.2, -124.875224, 6) j = 0 for i in km.cluster_centers_: gmap.marker(i[0], i[1], 'red', title="Centroid " + str(j)) j += 1 gmap.draw("centroids_map.html") from bs4 import BeautifulSoup
initialization='random', component_names=names) sm.train(n_job=1, verbose='info', train_rough_len=2, train_finetune_len=300) #Calcolo dell'errore topografico e di quantizzazione topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) print("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) #Visualizzazione delle component planes from sompy.visualization.mapview import View2D view2D = View2D(10, 10, "rand data", text_size=10) view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True) #Visualizzazione delle BMUHitsview from sompy.visualization.bmuhits import BmuHitsView vhts = BmuHitsView(4, 4, "Hits Map", text_size=12) vhts.show(sm, anotate=True, onlyzeros=False, labelsize=12, cmap="Greys", logaritmic=False) #Visualizzazione delle HitMapView from sompy.visualization.hitmap import HitMapView sm.cluster(4) #Indica il numero di cluster per il raggruppamento hits = HitMapView(20, 20, "Clustering", text_size=12) a = hits.show(sm)
print "Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error) from sompy.visualization.mapview import View2D view2D = View2D(4,4,"rand data",text_size=16) view2D.show(som, col_sz=2, which_dim="all", desnormalize=True) # U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=10,height=10,title='U-matrix') umat.show(som) from sompy.visualization.hitmap import HitMapView K=10 Kluster = som.cluster(K) hits = HitMapView(20,20,"K-Means Clustering",text_size=16) a=hits.show(som) # som.cluster(n_clusters=K) #som.cluster() returns the k-means cluster labels for each neuron of the map, #but it is straightforward to retrieve the cluster labels for the whole training set, #by assigning them the label of the BMUs (best-matching units). You can can do for example: #Make sure indices line up.... map_labels = som.cluster(n_clusters=K) # som._bmu[0] data_labels = np.array([map_labels[int(k)] for k in som._bmu[0]]) clusters = pd.Series(data_labels) clusters = clusters.rename('cluster').to_frame()
class MySOM: def __init__(self, df, mapsize, initialization='random'): """ :param df: 数据框 :param mapsize: 输出层维度,一般为二维,输入(20,20)的形式 :param initialization: "PCA" 或 "random",初始化权重的方法 - PCA是以变量的主成分值作为权重,见sompy.codebool.pca_linear_initialization - random是以随机数进行初始化 """ self.data = np.array(df) self.sm = SOMFactory().build(self.data, mapsize=mapsize, initialization=initialization, component_names=df.columns) self.train() def train(self): self.sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5) def print_error(self): topographic_error = self.sm.calculate_topographic_error() quantization_error = np.mean(self.sm._bmu[1]) print("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) def draw_input_weights(self): from sompy.visualization.mapview import View2D view2D = View2D(10, 10, "rand data", text_size=10) view2D.show(self.sm, col_sz=4, which_dim="all", desnormalize=True) plt.show() def draw_hit_map(self): from sompy.visualization.bmuhits import BmuHitsView vhts = BmuHitsView(4, 4, "Hits Map", text_size=12) vhts.show(self.sm, anotate=True, onlyzeros=False, labelsize=12, cmap="Greys", logaritmic=False) plt.show() def draw_cluster_map(self): from sompy.visualization.hitmap import HitMapView hits = HitMapView(20, 20, "Clustering", text_size=12) hits.show(self.sm) plt.show() def cluster(self, n): self.sm.cluster(n) def get_cluster_label(self): # 长度等于mapsize[0] * mapsize[1] return self.sm.cluster_labels def get_neurons(self): """ 获取原数据的每个样本对应的神经元,原包并未提供此方法,所以自己动手 :return: array, length = self.df.shape[0] """ return self.sm._bmu[0] def get_label(self): """ 获取原数据的每个样本对应的分类标签,原包并未提供此方法,所以自己动手 :return: array, length = self.df.shape[0] """ neurons_label_dict = { i: j for i, j in enumerate(self.sm.cluster_labels) } return np.array([neurons_label_dict[i] for i in self.sm._bmu[0]]) def predict(self, x): """ 以label作为y,采取各种机器学习算法 :param x: :return: """ pass
# U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=20, height=20, title='U-matrix') umat.show(som) from sompy.visualization.hitmap import HitMapView from sompy.visualization.bmuhits import BmuHitsView bmuhitsview = BmuHitsView(12, 12, 'Data per node', text_size=24) bmuhitsview.show(som, anotate=False, onlyzeros=False, labelsize=7, logaritmic=False) Kluster = som.cluster(5) hits = HitMapView(20, 20, "K-Means Clustering", text_size=16) a = hits.show(som, anotate=False, labelsize=7, cmap='viridis') def HowManyK(k): '''compute SSE for up to k clusters''' SSE = np.empty(0) K = np.arange(2, k) for i in K: totalERROR = 0 map_labels = som.cluster( n_clusters=i) # will eventually return more than labels.... data_labels = np.array([ map_labels[int(x)] for x in som._bmu[0]
topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) print "Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error) from sompy.visualization.mapview import View2D view2D = View2D(10, 10, "rand data", text_size=10) view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True, cmap='plasma') k_val = 4 from sompy.visualization.hitmap import HitMapView sm.cluster(k_val) hits = HitMapView(7, 7, "Clustering", text_size=9, cmap='Blues') a = hits.show(sm) from sompy.visualization.bmuhits import BmuHitsView vhts = BmuHitsView(5, 5, "Hits Map", text_size=11) vhts.show(sm, anotate=True, onlyzeros=False, labelsize=9, cmap="plasma", logaritmic=False) # Get the labels for each BMU # in the SOM (15 * 10 neurons)
# U-matrix plot from sompy.visualization.umatrix import UMatrixView umat = UMatrixView(width=10,height=10,title='U-matrix') umat.show(som) from sompy.visualization.hitmap import HitMapView from sompy.visualization.bmuhits import BmuHitsView bmuhitsview = BmuHitsView(12,12,'Data per node', text_size=24) bmuhitsview.show(som, anotate=False, onlyzeros=False, labelsize=7, logaritmic=False) Kluster = som.cluster(5) hits = HitMapView(20,20,"K-Means Clustering",text_size=16) a=hits.show(som) from ThirdSOM import bootstrap, HowManyK SSE_Matrix = bootstrap(runs=10,k=20) ##################### average columns in SSE_Matrix = np.mean(SSE_Matrix, axis=0) # SSE of K-means plt.plot(np.arange(2,SSE_Matrix.size+2), SSE_Matrix) plt.title('K-Means Optimal k') plt.xlabel('Number of Clusters, k') plt.ylabel('Sum Square Error')