def sm_training(self): """ Train the model with different parameters. """ file=askopenfilename(initialdir=dir_name, title="Select Data", filetypes=[("csv files", "*.csv")]) if file is None: tk.messagebox.showerror("Error","your chosen file is not valid. \n Please choose again.") content=open(file, "rb") data=pd.read_csv(content) # ind=data[data.columns[0]] # data = data.set_index(ind) comp_names=[name for name in data.columns] index = data.index # test cali housing first df=data.fillna(0).values # initialize the build sm=SOMFactory().build( data=df, mapsize=(int(self.Mapsize_x.get()), int(self.Mapsize_y.get())), mask=None, mapshape='planar', lattice=self.Lattice_ent.get(), normalization=self.Normalization_ent.get(), initialization=self.Initialization_ent.get(), neighborhood='gaussian', training='batch', name='sompy', component_names=self.comp_names) # start training sm.train(n_job=int(self.n_job_ent.get()), shared_memory=self.shared_memory_ent.get(), verbose=self.verbose_ent.get(), train_rough_len=int(self.train_rough_len_ent.get()), train_rough_radiusin=int(self.train_rough_rin_ent.get()), train_rough_radiusfin=int(self.train_rough_rfin_ent.get()), train_finetune_len=int(self.train_ft_len_ent.get()), train_finetune_radiusin=int(self.train_ft_rin_ent.get()), train_finetune_radiusfin=int(self.train_ft_rfin_ent.get()), train_len_factor=int(self.train_len_factor_ent.get()), maxtrainlen=np.Inf) # errors calculation topographic_error=sm.calculate_topographic_error() quantitization_error=np.mean(sm._bmu[1]) # if multiple runs are required # joblib.dump(sm, "model_{}.joblib".format(i)) pickle.dump(sm, open("Models/sm_model", "wb")) # print errors on the cmd prompt print("the topographic error is %s " % topographic_error) print("the quantitization error is %s " % quantitization_error)
def training_batched_som(map_min_size, map_max_size, nb_models, X_train): for i in range(nb_models): sm = SOMFactory().build( X_train, mapsize=[ random.choice(list(range(map_min_size, map_max_size))), random.choice(list(range(map_min_size, map_max_size))) ], normalization='var', initialization='random', component_names=names, lattice="hexa") sm.train(n_job=1, verbose=False, train_rough_len=30, train_finetune_len=100) joblib.dump(sm, path + "batched_model_{}.joblib".format(i)) print("end of training model n°" + str(i)) # Study the models trained and plot the errors obtained in order to select the best one models_pool = glob.glob(path + "batched_model*") errors = [] for model_filepath in models_pool: sm = joblib.load(model_filepath) topographic_error = sm.calculate_topographic_error() quantization_error = sm.calculate_quantization_error() errors.append((topographic_error, quantization_error)) e_top, e_q = zip(*errors) plt.scatter(e_top, e_q) plt.xlabel("Topographic error") plt.ylabel("Quantization error") plt.title("Topographic and quantization errors of the models trained") plt.show()
def _prediction(self): """SOM function""" try: data = np.loadtxt('/home/mininet/testmininet/trainingdata1.txt', delimiter=',') names = [ 'Interval', 'Throughput(Mbits/0.5sec)', 'Bandwidth(Mbits/sec)', 'Jitter(ms)', 'Loss', 'Decision' ] sm = SOMFactory().build(data, normalization='var', initialization='random', component_names=names) sm.train(n_job=1, verbose='info', train_rough_len=15, train_finetune_len=15) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) line = open('/home/mininet/testmininet/pdata1.txt').readlines() log.debug(line) comp = line[0].split(",") del comp[len(comp) - 1] data2 = np.array([[ float(comp[0]), float(comp[1]), float(comp[2]), float(comp[3]), float(comp[4]) ]]) sm.cluster(5) pred = np.absolute(sm.predict_by(data2, 5)) self.details.write(comp[4] + "\t" + comp[1] + "\t" + str(pred[0]) + "\t" + str(topographic_error) + "\n") print(pred) if pred <= 0.5: print("No congestion") self._congdelay(pred) elif pred > 0.5: print("Congestion there for next 5 seconds atleast") self.prevpred = pred except IndexError: print("ERROR")
def build_som(self, X): print('Building SOM...') sm = SOMFactory().build(X, normalization='var', mapsize=(15, 15), initialization='pca') sm.train(n_job=1, verbose='info', train_rough_len=200, train_finetune_len=100) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) print ("Topographic error = {}; Quantization error = {}"\ .format(topographic_error,quantization_error)) return sm
def training_specific_som(map_x_size, map_y_size, X_train): sm = SOMFactory().build(X_train, mapsize=[map_x_size, map_y_size], normalization='var', initialization='random', component_names=names, lattice='hexa') sm.train(n_job=1, verbose=False, train_rough_len=30, train_finetune_len=100) joblib.dump(sm, path + "batched_model_specific{}.joblib".format(0)) print("Topographic error: " + str(sm.calculate_topographic_error()) + ", Quantization error: " + str(sm.calculate_quantization_error()) + "\n") return (sm)
def self_organizing_map(normalized_df, normalization='var', initialization='pca', n_job=1, train_rough_len=2, train_finetune_len=5, verbose=None): # create the SOM network and train it. You can experiment with different normalizations and initializations som = SOMFactory().build(normalized_df.values, normalization=normalization, initialization=initialization, component_names=normalized_df.columns) som.train(n_job=n_job, train_rough_len=train_rough_len, train_finetune_len=train_finetune_len, verbose=verbose) # The quantization error: average distance between each data vector and its BMU. # The topographic error: the proportion of all data vectors for which first and second BMUs are not adjacent units. topographic_error = som.calculate_topographic_error() quantization_error = np.mean(som._bmu[1]) print("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) return som
# -*- coding: utf-8 -*- """ Created on Sat Oct 7 15:09:18 2017 @author: Ethan """ import numpy as np from matplotlib import pyplot as plt from sompy.sompy import SOMFactory data = np.random.randint(0, 255, (100, 3)) dims = np.array([5, 5]) iterations = 2000 learningRate = 0.01 # normalize data = data / data.max() sm = SOMFactory().build(data, normalization = 'var', initialization='random', component_names=['r', 'g', 'b']) sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1])
## Z.append([mags[i] for i in gunn + isubaru]) Z.append([mags[i] for i in gunn]) ## Z = np.array(Z) print(Z) print('\n\n') sm = SOMFactory().build(Z, normalization='var', initialization='random', component_names=gunn) sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5) topographic_error = sm.calculate_topographic_error() quantization_error = np.mean(sm._bmu[1]) print("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) vhts = BmuHitsView(10, 10, 'Hits Map', text_size=7) vhts.show(sm, anotate=True, onlyzeros=False, labelsize=12, cmap='Greys', logaritmic=False) pl.show()
class MySOM: def __init__(self, df, mapsize, initialization='random'): """ :param df: 数据框 :param mapsize: 输出层维度,一般为二维,输入(20,20)的形式 :param initialization: "PCA" 或 "random",初始化权重的方法 - PCA是以变量的主成分值作为权重,见sompy.codebool.pca_linear_initialization - random是以随机数进行初始化 """ self.data = np.array(df) self.sm = SOMFactory().build(self.data, mapsize=mapsize, initialization=initialization, component_names=df.columns) self.train() def train(self): self.sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5) def print_error(self): topographic_error = self.sm.calculate_topographic_error() quantization_error = np.mean(self.sm._bmu[1]) print("Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)) def draw_input_weights(self): from sompy.visualization.mapview import View2D view2D = View2D(10, 10, "rand data", text_size=10) view2D.show(self.sm, col_sz=4, which_dim="all", desnormalize=True) plt.show() def draw_hit_map(self): from sompy.visualization.bmuhits import BmuHitsView vhts = BmuHitsView(4, 4, "Hits Map", text_size=12) vhts.show(self.sm, anotate=True, onlyzeros=False, labelsize=12, cmap="Greys", logaritmic=False) plt.show() def draw_cluster_map(self): from sompy.visualization.hitmap import HitMapView hits = HitMapView(20, 20, "Clustering", text_size=12) hits.show(self.sm) plt.show() def cluster(self, n): self.sm.cluster(n) def get_cluster_label(self): # 长度等于mapsize[0] * mapsize[1] return self.sm.cluster_labels def get_neurons(self): """ 获取原数据的每个样本对应的神经元,原包并未提供此方法,所以自己动手 :return: array, length = self.df.shape[0] """ return self.sm._bmu[0] def get_label(self): """ 获取原数据的每个样本对应的分类标签,原包并未提供此方法,所以自己动手 :return: array, length = self.df.shape[0] """ neurons_label_dict = { i: j for i, j in enumerate(self.sm.cluster_labels) } return np.array([neurons_label_dict[i] for i in self.sm._bmu[0]]) def predict(self, x): """ 以label作为y,采取各种机器学习算法 :param x: :return: """ pass
dfdrop = df.drop_duplicates() dfdrop.info() topo = [] quant = [] array = np.arange(10, 25, 1) for i in array: som = SOMFactory().build(df.values, mapsize=[i,i], normalization = 'var', initialization='pca', component_names=names,\ neighborhood = 'gaussian', lattice='rect') som.train(n_job=1, verbose='info', train_rough_len=50, train_rough_radiusin=4, train_finetune_radiusin=1, train_finetune_len=50) topo.append(som.calculate_topographic_error()) quant.append(np.mean(som._bmu[1])) print i plt.scatter(topo, quant, c=array, s=50) plt.title('Self Organizing Map') plt.xlabel('Topographic Error') plt.ylabel('Quantization Error') plt.colorbar(label='grid size nxn') som = SOMFactory().build(df.values, mapsize=[20,20], normalization = 'var', initialization='pca', component_names=names,\ neighborhood = 'gaussian', lattice='rect') som.train(n_job=1, verbose='info', train_rough_len=100,