예제 #1
0
    def sm_training(self):
        """
        Train the model with different parameters.
        """
        file=askopenfilename(initialdir=dir_name, title="Select Data",
                             filetypes=[("csv files", "*.csv")])
        
        if file is None:
            tk.messagebox.showerror("Error","your chosen file is not valid. \n Please choose again.")

        content=open(file, "rb")
        data=pd.read_csv(content)
        # ind=data[data.columns[0]]
        
        # data = data.set_index(ind)
        comp_names=[name for name in data.columns]
        index = data.index

        # test cali housing first
        df=data.fillna(0).values

        # initialize the build
        sm=SOMFactory().build(
            data=df,
            mapsize=(int(self.Mapsize_x.get()), int(self.Mapsize_y.get())),
            mask=None,
            mapshape='planar',
            lattice=self.Lattice_ent.get(),
            normalization=self.Normalization_ent.get(),
            initialization=self.Initialization_ent.get(),
            neighborhood='gaussian',
            training='batch',
            name='sompy',
            component_names=self.comp_names)

        # start training
        sm.train(n_job=int(self.n_job_ent.get()),
                shared_memory=self.shared_memory_ent.get(),
                verbose=self.verbose_ent.get(),
                train_rough_len=int(self.train_rough_len_ent.get()),
                train_rough_radiusin=int(self.train_rough_rin_ent.get()),
                train_rough_radiusfin=int(self.train_rough_rfin_ent.get()),
                train_finetune_len=int(self.train_ft_len_ent.get()),
                train_finetune_radiusin=int(self.train_ft_rin_ent.get()),
                train_finetune_radiusfin=int(self.train_ft_rfin_ent.get()),
                train_len_factor=int(self.train_len_factor_ent.get()),
                maxtrainlen=np.Inf)

        # errors calculation
        topographic_error=sm.calculate_topographic_error()
        quantitization_error=np.mean(sm._bmu[1])

        # if multiple runs are required
        # joblib.dump(sm, "model_{}.joblib".format(i))

        pickle.dump(sm, open("Models/sm_model", "wb"))

        # print errors on the cmd prompt
        print("the topographic error is %s " % topographic_error)
        print("the quantitization error is %s " % quantitization_error)
예제 #2
0
def training_batched_som(map_min_size, map_max_size, nb_models, X_train):
    for i in range(nb_models):
        sm = SOMFactory().build(
            X_train,
            mapsize=[
                random.choice(list(range(map_min_size, map_max_size))),
                random.choice(list(range(map_min_size, map_max_size)))
            ],
            normalization='var',
            initialization='random',
            component_names=names,
            lattice="hexa")
        sm.train(n_job=1,
                 verbose=False,
                 train_rough_len=30,
                 train_finetune_len=100)
        joblib.dump(sm, path + "batched_model_{}.joblib".format(i))
        print("end of training model n°" + str(i))

    # Study the models trained and plot the errors obtained in order to select the best one
    models_pool = glob.glob(path + "batched_model*")
    errors = []
    for model_filepath in models_pool:
        sm = joblib.load(model_filepath)
        topographic_error = sm.calculate_topographic_error()
        quantization_error = sm.calculate_quantization_error()
        errors.append((topographic_error, quantization_error))
    e_top, e_q = zip(*errors)

    plt.scatter(e_top, e_q)
    plt.xlabel("Topographic error")
    plt.ylabel("Quantization error")
    plt.title("Topographic and quantization errors of the models trained")
    plt.show()
예제 #3
0
    def _prediction(self):
        """SOM function"""
        try:
            data = np.loadtxt('/home/mininet/testmininet/trainingdata1.txt',
                              delimiter=',')
            names = [
                'Interval', 'Throughput(Mbits/0.5sec)', 'Bandwidth(Mbits/sec)',
                'Jitter(ms)', 'Loss', 'Decision'
            ]

            sm = SOMFactory().build(data,
                                    normalization='var',
                                    initialization='random',
                                    component_names=names)

            sm.train(n_job=1,
                     verbose='info',
                     train_rough_len=15,
                     train_finetune_len=15)

            topographic_error = sm.calculate_topographic_error()
            quantization_error = np.mean(sm._bmu[1])
            line = open('/home/mininet/testmininet/pdata1.txt').readlines()
            log.debug(line)
            comp = line[0].split(",")
            del comp[len(comp) - 1]
            data2 = np.array([[
                float(comp[0]),
                float(comp[1]),
                float(comp[2]),
                float(comp[3]),
                float(comp[4])
            ]])
            sm.cluster(5)
            pred = np.absolute(sm.predict_by(data2, 5))

            self.details.write(comp[4] + "\t" + comp[1] + "\t" + str(pred[0]) +
                               "\t" + str(topographic_error) + "\n")
            print(pred)
            if pred <= 0.5:
                print("No congestion")
                self._congdelay(pred)
            elif pred > 0.5:
                print("Congestion there for next 5 seconds atleast")

            self.prevpred = pred
        except IndexError:
            print("ERROR")
예제 #4
0
    def build_som(self, X):
        print('Building SOM...')
        sm = SOMFactory().build(X,
                                normalization='var',
                                mapsize=(15, 15),
                                initialization='pca')
        sm.train(n_job=1,
                 verbose='info',
                 train_rough_len=200,
                 train_finetune_len=100)

        topographic_error = sm.calculate_topographic_error()
        quantization_error = np.mean(sm._bmu[1])
        print ("Topographic error = {}; Quantization error = {}"\
        .format(topographic_error,quantization_error))
        return sm
예제 #5
0
def training_specific_som(map_x_size, map_y_size, X_train):
    sm = SOMFactory().build(X_train,
                            mapsize=[map_x_size, map_y_size],
                            normalization='var',
                            initialization='random',
                            component_names=names,
                            lattice='hexa')
    sm.train(n_job=1,
             verbose=False,
             train_rough_len=30,
             train_finetune_len=100)
    joblib.dump(sm, path + "batched_model_specific{}.joblib".format(0))
    print("Topographic error: " + str(sm.calculate_topographic_error()) +
          ", Quantization error: " + str(sm.calculate_quantization_error()) +
          "\n")
    return (sm)
예제 #6
0
def self_organizing_map(normalized_df,
                        normalization='var',
                        initialization='pca',
                        n_job=1,
                        train_rough_len=2,
                        train_finetune_len=5,
                        verbose=None):
    # create the SOM network and train it. You can experiment with different normalizations and initializations
    som = SOMFactory().build(normalized_df.values,
                             normalization=normalization,
                             initialization=initialization,
                             component_names=normalized_df.columns)
    som.train(n_job=n_job,
              train_rough_len=train_rough_len,
              train_finetune_len=train_finetune_len,
              verbose=verbose)

    # The quantization error: average distance between each data vector and its BMU.
    # The topographic error: the proportion of all data vectors for which first and second BMUs are not adjacent units.
    topographic_error = som.calculate_topographic_error()
    quantization_error = np.mean(som._bmu[1])
    print("Topographic error = %s; Quantization error = %s" %
          (topographic_error, quantization_error))
    return som
예제 #7
0
# -*- coding: utf-8 -*-
"""
Created on Sat Oct  7 15:09:18 2017

@author: Ethan
"""

import numpy as np
from matplotlib import pyplot as plt
from sompy.sompy import SOMFactory

data = np.random.randint(0, 255, (100, 3))

dims = np.array([5, 5])
iterations = 2000
learningRate = 0.01

# normalize
data = data / data.max()

sm = SOMFactory().build(data, normalization = 'var', initialization='random', component_names=['r', 'g', 'b'])
sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5)
topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])
예제 #8
0
파일: som.py 프로젝트: michaelJwilson/BEAST
        ##  Z.append([mags[i] for i in gunn + isubaru])
        Z.append([mags[i] for i in gunn])

##
Z = np.array(Z)

print(Z)
print('\n\n')

sm = SOMFactory().build(Z,
                        normalization='var',
                        initialization='random',
                        component_names=gunn)
sm.train(n_job=1, verbose=False, train_rough_len=2, train_finetune_len=5)

topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])

print("Topographic error = %s; Quantization error = %s" %
      (topographic_error, quantization_error))

vhts = BmuHitsView(10, 10, 'Hits Map', text_size=7)
vhts.show(sm,
          anotate=True,
          onlyzeros=False,
          labelsize=12,
          cmap='Greys',
          logaritmic=False)

pl.show()
예제 #9
0
class MySOM:
    def __init__(self, df, mapsize, initialization='random'):
        """
        
        :param df:              数据框 
        :param mapsize:         输出层维度,一般为二维,输入(20,20)的形式
        :param initialization:  "PCA" 或 "random",初始化权重的方法
                - PCA是以变量的主成分值作为权重,见sompy.codebool.pca_linear_initialization
                - random是以随机数进行初始化
        """
        self.data = np.array(df)
        self.sm = SOMFactory().build(self.data,
                                     mapsize=mapsize,
                                     initialization=initialization,
                                     component_names=df.columns)
        self.train()

    def train(self):
        self.sm.train(n_job=1,
                      verbose=False,
                      train_rough_len=2,
                      train_finetune_len=5)

    def print_error(self):
        topographic_error = self.sm.calculate_topographic_error()
        quantization_error = np.mean(self.sm._bmu[1])
        print("Topographic error = %s; Quantization error = %s" %
              (topographic_error, quantization_error))

    def draw_input_weights(self):
        from sompy.visualization.mapview import View2D
        view2D = View2D(10, 10, "rand data", text_size=10)
        view2D.show(self.sm, col_sz=4, which_dim="all", desnormalize=True)
        plt.show()

    def draw_hit_map(self):
        from sompy.visualization.bmuhits import BmuHitsView
        vhts = BmuHitsView(4, 4, "Hits Map", text_size=12)
        vhts.show(self.sm,
                  anotate=True,
                  onlyzeros=False,
                  labelsize=12,
                  cmap="Greys",
                  logaritmic=False)
        plt.show()

    def draw_cluster_map(self):
        from sompy.visualization.hitmap import HitMapView
        hits = HitMapView(20, 20, "Clustering", text_size=12)
        hits.show(self.sm)
        plt.show()

    def cluster(self, n):
        self.sm.cluster(n)

    def get_cluster_label(self):
        # 长度等于mapsize[0] * mapsize[1]
        return self.sm.cluster_labels

    def get_neurons(self):
        """
        获取原数据的每个样本对应的神经元,原包并未提供此方法,所以自己动手
        :return: array, length = self.df.shape[0]
        """
        return self.sm._bmu[0]

    def get_label(self):
        """
        获取原数据的每个样本对应的分类标签,原包并未提供此方法,所以自己动手
        :return: array, length = self.df.shape[0]
        """
        neurons_label_dict = {
            i: j
            for i, j in enumerate(self.sm.cluster_labels)
        }
        return np.array([neurons_label_dict[i] for i in self.sm._bmu[0]])

    def predict(self, x):
        """
        以label作为y,采取各种机器学习算法
        :param x: 
        :return: 
        """
        pass
예제 #10
0
dfdrop = df.drop_duplicates()
dfdrop.info()

topo = []
quant = []
array = np.arange(10, 25, 1)
for i in array:
    som = SOMFactory().build(df.values, mapsize=[i,i], normalization = 'var', initialization='pca', component_names=names,\
                    neighborhood = 'gaussian', lattice='rect')
    som.train(n_job=1,
              verbose='info',
              train_rough_len=50,
              train_rough_radiusin=4,
              train_finetune_radiusin=1,
              train_finetune_len=50)
    topo.append(som.calculate_topographic_error())
    quant.append(np.mean(som._bmu[1]))
    print i
plt.scatter(topo, quant, c=array, s=50)
plt.title('Self Organizing Map')
plt.xlabel('Topographic Error')
plt.ylabel('Quantization Error')
plt.colorbar(label='grid size nxn')



som = SOMFactory().build(df.values, mapsize=[20,20], normalization = 'var', initialization='pca', component_names=names,\
                    neighborhood = 'gaussian', lattice='rect')
som.train(n_job=1,
          verbose='info',
          train_rough_len=100,