Beispiel #1
0
    def _prediction(self):
        """SOM function"""
        try:
            data = np.loadtxt('/home/mininet/testmininet/trainingdata1.txt',
                              delimiter=',')
            names = [
                'Interval', 'Throughput(Mbits/0.5sec)', 'Bandwidth(Mbits/sec)',
                'Jitter(ms)', 'Loss', 'Decision'
            ]

            sm = SOMFactory().build(data,
                                    normalization='var',
                                    initialization='random',
                                    component_names=names)

            sm.train(n_job=1,
                     verbose='info',
                     train_rough_len=15,
                     train_finetune_len=15)

            topographic_error = sm.calculate_topographic_error()
            quantization_error = np.mean(sm._bmu[1])
            line = open('/home/mininet/testmininet/pdata1.txt').readlines()
            log.debug(line)
            comp = line[0].split(",")
            del comp[len(comp) - 1]
            data2 = np.array([[
                float(comp[0]),
                float(comp[1]),
                float(comp[2]),
                float(comp[3]),
                float(comp[4])
            ]])
            sm.cluster(5)
            pred = np.absolute(sm.predict_by(data2, 5))

            self.details.write(comp[4] + "\t" + comp[1] + "\t" + str(pred[0]) +
                               "\t" + str(topographic_error) + "\n")
            print(pred)
            if pred <= 0.5:
                print("No congestion")
                self._congdelay(pred)
            elif pred > 0.5:
                print("Congestion there for next 5 seconds atleast")

            self.prevpred = pred
        except IndexError:
            print("ERROR")
Beispiel #2
0
def cluster_category_data(df,
                          scale_data='minmax',
                          dim_red_method='som',
                          use_elbow_method='True',
                          cluster_method='hierarchical',
                          n_clusters=None,
                          verbose=1,
                          perplexity=None):
    """
    :param df: dataframe containing all the columns belonging to a category to be used in clustering
    :param scale_data: method to be used to scale the dataset
    :param dim_red_method: options are 'som', 'umap', 'tsne', None. If  None, do clustering directly.
    :param use_elbow_method: if True, elbow method is used to find the optimum number of clusters. If False, n_clusters needs to be specified
    :param cluster_method: options are 'kmeans' and 'hierarchical'. In either case kmeans is used for the elbow method(because of the time required).
    :param n_clusters: If use_elbow_method is False, n_clusters needs to be given.
    :param verbose: If True, output the progress in clustering process
    :param perplexity: If method used is TSNE, perplexity nedds to be specified
    """
    t = time.time()

    if scale_data == 'minmax':
        X = MinMaxScaler().fit_transform(df)
    elif scale_data == 'standard':
        X = StandardScaler().fit_transform(df)
    else:
        X = df.values

    if verbose:
        print(f'number of features = {df.shape[1]}')

    if dim_red_method == 'som':
        if verbose:
            print(
                'Self Organising Maps is being used for dimensionality reduction...'
            )
        opt_k = 2
        max_s = -1
        f = 0
        for mapsize in [(30, 30)]:
            if verbose:
                print(f'map size = {mapsize}')
            sm = SOMFactory().build(X,
                                    normalization='var',
                                    initialization='pca',
                                    mapsize=mapsize)
            sm.train(n_job=1,
                     verbose=False,
                     train_rough_len=100,
                     train_finetune_len=500)
            if use_elbow_method:
                model = KElbowVisualizer(KMeans(), k=20, timings=False)
                elbow = model.fit(sm.codebook.matrix).elbow_value_
                if elbow and verbose:
                    print(f'elbow value = {elbow}')
                if not elbow:
                    if verbose:
                        print('elbow not found')
                    ms = -1
                    for k in range(2, 20):
                        km_labels = KMeans(k).fit_predict(sm.codebook.matrix)
                        s = silhouette_score(sm.codebook.matrix, km_labels)
                        if s > ms:
                            elbow = k
            else:
                elbow = n_clusters
            x = sm.project_data(X)
            labels, _, _ = sm.cluster(opt=elbow, cl_type=cluster_method)
            clabels = []
            for i in range(X.shape[0]):
                clabels.append(labels[x[i]])
            s_score = silhouette_score(X, clabels)
            if verbose:
                print(f'silhouette score = {round(s_score, 3)}')
            max_s = max(s_score, max_s)
            if (max_s == s_score):
                opt_k = elbow
                opt_labels = clabels
                opt_size = mapsize
            if (max_s > s_score):
                break
        if verbose:
            print(f'optimum mapsize = {opt_size}')
            print(
                f'optimum number of clusters = {opt_k} & silhouette score = {round(max_s,3)}'
            )
            print(f'time taken = {round(time.time()-t,1)}')
        return opt_labels, opt_k

    elif dim_red_method:
        if dim_red_method == 'umap':
            print('UMAP is being used for dimensionality reduction...')
            embedding = umap.UMAP(n_components=2,
                                  n_neighbors=5,
                                  min_dist=0.0001,
                                  metric='euclidean',
                                  random_state=1,
                                  spread=0.5,
                                  n_epochs=1000).fit_transform(X)
            print('UMAP embedding done...')
        elif dim_red_method == 'tsne':
            print('t-SNE is being used for dimensionality reduction...')
            embedding = TSNE(perplexity=perplexity).fit_transform(X)
            print('t-SNE embedding is done...')
        if use_elbow_method:
            model = KElbowVisualizer(KMeans(), k=20, timings=False)
            elbow = model.fit(embedding).elbow_value_
        else:
            elbow = n_clusters
        if cluster_method == 'kmeans':
            opt_labels = KMeans(elbow).fit_predict(embedding)
        elif cluster_method == 'hierarchical':
            opt_labels = AgglomerativeClustering(elbow).fit_predict(embedding)
        if verbose:
            s_score = silhouette_score(X, opt_labels)
            print(
                f'number of clusters = {elbow} and silhouette_score = {s_score}'
            )
        return opt_labels, elbow

    else:
        if use_elbow_method:
            model = KElbowVisualizer(KMeans(), k=20, timings=False)
            elbow = model.fit(X).elbow_value_
        else:
            elbow = n_clusters
        if cluster_method == 'kmeans':
            opt_labels = KMeans(elbow).fit_predict(X)
        elif cluster_method == 'hierarchical':
            opt_labels = AgglomerativeClustering(elbow).fit_predict(X)
        print(f'silhouette score = {round(silhouette_score(X,opt_labels),3)}')
        return opt_labels, elbow
centroids_hc_som_cons = som_hc_cons.groupby('Hierarchical Clustering').mean()
centroids_hc_som_cons = centroids_hc_som_cons.drop(columns='Labels')

# 14.1.1.1 Silhouette scores
# Average silhouette score
silhouette_avg_som_hc_cons = silhouette_score(std_cons, som_hc_cons['Hierarchical Clustering'].values)
# Silhouette scores individual to each observation
sample_silhouette_som_hc_cons = pd.DataFrame(
    silhouette_samples(std_cons, som_hc_cons['Hierarchical Clustering'].values), columns=['Value'])
# Number of positives silhouette scores
pos_sample_hc_cons = sample_silhouette_som_hc_cons[sample_silhouette_som_hc_cons.Value > 0].count()

# 14.1.2 K-Means Clustering on top of SOM
# Visualize to which of the k cluster from the k-means belongs each neuron
k = 3
som_kmeans_cons = sm_consump.cluster(k)
hits = HitMapView(10, 10, "Clustering", text_size=7)
a = hits.show(sm_consump)

# 'som_kmeans_cons' is a dataframe with a column 'K-means' that specifies to which cluster belongs each client
som_kmeans_cons = pd.DataFrame(som_kmeans_cons, columns=['K_means'])
som_kmeans_cons['Labels'] = range(mapsize_consump * mapsize_consump)
som_kmeans_cons = final_clusters_consump.merge(som_kmeans_cons, how='inner', on='Labels', right_index=True)
som_kmeans_cons = som_kmeans_cons.sort_index()

# Verify the number of observations associated of each cluster and the cluster centroids coordinates
count_obs_som_kmeans_cons = som_kmeans_cons.groupby('K_means').count()
centroids_som_kmeans_cons = som_kmeans_cons.groupby('K_means').mean()
centroids_som_kmeans_cons = centroids_som_kmeans_cons.drop(columns='Labels')

# 14.1.2.1 silhouette scores
Beispiel #4
0
sm = SOMFactory().build(df.values,
                        [50, 50],
                        mask=None, mapshape='planar',
                        lattice='rect',
                        normalization='var',
                        initialization='pca',
                        component_names=list(df.columns))

sm.train(n_job=2, verbose='info', train_rough_len=30, train_finetune_len=20)

with open(
    '/content/drive/My Drive/IC_Cristine/SOM/som_segundo.pkl',
    'wb') as arq:
    pickle.dump(sm, arq)

topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])
print("Topographic error = {0}; Quantization error = {1}".format(
    topographic_error, quantization_error))

view2D = sompy.mapview.View2D(100, 100, "rand data", text_size=14)
view2D.show(sm, col_sz=5, which_dim="all", denormalize=True)

cl = sm.cluster(n_clusters=4)

h = sompy.hitmap.HitMapView(10, 10, 'hitmap', text_size=8, show_text=True)
h.show(sm)

u = sompy.umatrix.UMatrixView(50, 50, 'umatrix', show_axis=True, text_size=8, show_text=True)
UMAT = u.build_u_matrix(sm, distance=1, row_normalized=False)
UMAT = u.show(sm, distance2=1, row_normalized=False, show_data=True, contooor=True, blob=False)
Beispiel #5
0
# component planes view
from sompy.visualization.mapview import View2D
view2D = View2D(10, 10, "rand data", text_size=12)
view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True)

# U-matrix plot
from sompy.visualization.umatrix import UMatrixView

umat = UMatrixView(width=10, height=10, title='U-matrix')
umat.show(sm)

# do the K-means clustering on the SOM grid, sweep across k = 2 to 20
from sompy.visualization.hitmap import HitMapView
K = 20  # stop at this k for SSE sweep
K_opt = 18  # optimal K already found
[labels, km, norm_data] = sm.cluster(K, K_opt)
hits = HitMapView(20, 20, "Clustering", text_size=12)
a = hits.show(sm)

import gmplot

gmap = gmplot.GoogleMapPlotter(54.2, -124.875224, 6)
j = 0
for i in km.cluster_centers_:
    gmap.marker(i[0], i[1], 'red', title="Centroid " + str(j))
    j += 1

gmap.draw("centroids_map.html")

from bs4 import BeautifulSoup
Beispiel #6
0
                        initialization='random',
                        component_names=names)
sm.train(n_job=1, verbose='info', train_rough_len=2, train_finetune_len=300)

#Calcolo dell'errore topografico e di quantizzazione
topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])
print("Topographic error = %s; Quantization error = %s" %
      (topographic_error, quantization_error))

#Visualizzazione delle component planes
from sompy.visualization.mapview import View2D
view2D = View2D(10, 10, "rand data", text_size=10)
view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True)

#Visualizzazione delle BMUHitsview
from sompy.visualization.bmuhits import BmuHitsView
vhts = BmuHitsView(4, 4, "Hits Map", text_size=12)
vhts.show(sm,
          anotate=True,
          onlyzeros=False,
          labelsize=12,
          cmap="Greys",
          logaritmic=False)

#Visualizzazione delle HitMapView
from sompy.visualization.hitmap import HitMapView
sm.cluster(4)  #Indica il numero di cluster per il raggruppamento
hits = HitMapView(20, 20, "Clustering", text_size=12)
a = hits.show(sm)
print "Topographic error = %s; Quantization error = %s" % (topographic_error, quantization_error)

from sompy.visualization.mapview import View2D
view2D  = View2D(4,4,"rand data",text_size=16)
view2D.show(som, col_sz=2, which_dim="all", desnormalize=True)

# U-matrix plot
from sompy.visualization.umatrix import UMatrixView

umat  = UMatrixView(width=10,height=10,title='U-matrix')
umat.show(som)


from sompy.visualization.hitmap import HitMapView
K=10
Kluster = som.cluster(K)
hits  = HitMapView(20,20,"K-Means Clustering",text_size=16)
a=hits.show(som)


#
som.cluster(n_clusters=K) 
#som.cluster() returns the k-means cluster labels for each neuron of the map, 
#but it is straightforward to retrieve the cluster labels for the whole training set, 
#by assigning them the label of the BMUs (best-matching units). You can can do for example:
#Make sure indices line up.... 
map_labels = som.cluster(n_clusters=K)
# som._bmu[0]
data_labels = np.array([map_labels[int(k)] for k in som._bmu[0]])
clusters = pd.Series(data_labels)
clusters = clusters.rename('cluster').to_frame()
Beispiel #8
0
class MySOM:
    def __init__(self, df, mapsize, initialization='random'):
        """
        
        :param df:              数据框 
        :param mapsize:         输出层维度,一般为二维,输入(20,20)的形式
        :param initialization:  "PCA" 或 "random",初始化权重的方法
                - PCA是以变量的主成分值作为权重,见sompy.codebool.pca_linear_initialization
                - random是以随机数进行初始化
        """
        self.data = np.array(df)
        self.sm = SOMFactory().build(self.data,
                                     mapsize=mapsize,
                                     initialization=initialization,
                                     component_names=df.columns)
        self.train()

    def train(self):
        self.sm.train(n_job=1,
                      verbose=False,
                      train_rough_len=2,
                      train_finetune_len=5)

    def print_error(self):
        topographic_error = self.sm.calculate_topographic_error()
        quantization_error = np.mean(self.sm._bmu[1])
        print("Topographic error = %s; Quantization error = %s" %
              (topographic_error, quantization_error))

    def draw_input_weights(self):
        from sompy.visualization.mapview import View2D
        view2D = View2D(10, 10, "rand data", text_size=10)
        view2D.show(self.sm, col_sz=4, which_dim="all", desnormalize=True)
        plt.show()

    def draw_hit_map(self):
        from sompy.visualization.bmuhits import BmuHitsView
        vhts = BmuHitsView(4, 4, "Hits Map", text_size=12)
        vhts.show(self.sm,
                  anotate=True,
                  onlyzeros=False,
                  labelsize=12,
                  cmap="Greys",
                  logaritmic=False)
        plt.show()

    def draw_cluster_map(self):
        from sompy.visualization.hitmap import HitMapView
        hits = HitMapView(20, 20, "Clustering", text_size=12)
        hits.show(self.sm)
        plt.show()

    def cluster(self, n):
        self.sm.cluster(n)

    def get_cluster_label(self):
        # 长度等于mapsize[0] * mapsize[1]
        return self.sm.cluster_labels

    def get_neurons(self):
        """
        获取原数据的每个样本对应的神经元,原包并未提供此方法,所以自己动手
        :return: array, length = self.df.shape[0]
        """
        return self.sm._bmu[0]

    def get_label(self):
        """
        获取原数据的每个样本对应的分类标签,原包并未提供此方法,所以自己动手
        :return: array, length = self.df.shape[0]
        """
        neurons_label_dict = {
            i: j
            for i, j in enumerate(self.sm.cluster_labels)
        }
        return np.array([neurons_label_dict[i] for i in self.sm._bmu[0]])

    def predict(self, x):
        """
        以label作为y,采取各种机器学习算法
        :param x: 
        :return: 
        """
        pass
Beispiel #9
0
# U-matrix plot
from sompy.visualization.umatrix import UMatrixView

umat = UMatrixView(width=20, height=20, title='U-matrix')
umat.show(som)

from sompy.visualization.hitmap import HitMapView
from sompy.visualization.bmuhits import BmuHitsView
bmuhitsview = BmuHitsView(12, 12, 'Data per node', text_size=24)
bmuhitsview.show(som,
                 anotate=False,
                 onlyzeros=False,
                 labelsize=7,
                 logaritmic=False)

Kluster = som.cluster(5)
hits = HitMapView(20, 20, "K-Means Clustering", text_size=16)
a = hits.show(som, anotate=False, labelsize=7, cmap='viridis')


def HowManyK(k):
    '''compute SSE for up to k clusters'''

    SSE = np.empty(0)
    K = np.arange(2, k)
    for i in K:
        totalERROR = 0
        map_labels = som.cluster(
            n_clusters=i)  # will eventually return more than labels....
        data_labels = np.array([
            map_labels[int(x)] for x in som._bmu[0]
Beispiel #10
0
topographic_error = sm.calculate_topographic_error()
quantization_error = np.mean(sm._bmu[1])
print "Topographic error = %s; Quantization error = %s" % (topographic_error,
                                                           quantization_error)

from sompy.visualization.mapview import View2D

view2D = View2D(10, 10, "rand data", text_size=10)
view2D.show(sm, col_sz=4, which_dim="all", desnormalize=True, cmap='plasma')

k_val = 4

from sompy.visualization.hitmap import HitMapView

sm.cluster(k_val)
hits = HitMapView(7, 7, "Clustering", text_size=9, cmap='Blues')
a = hits.show(sm)

from sompy.visualization.bmuhits import BmuHitsView

vhts = BmuHitsView(5, 5, "Hits Map", text_size=11)
vhts.show(sm,
          anotate=True,
          onlyzeros=False,
          labelsize=9,
          cmap="plasma",
          logaritmic=False)

# Get the labels for each BMU
# in the SOM (15 * 10 neurons)
Beispiel #11
0
# U-matrix plot
from sompy.visualization.umatrix import UMatrixView

umat  = UMatrixView(width=10,height=10,title='U-matrix')
umat.show(som)



from sompy.visualization.hitmap import HitMapView
from sompy.visualization.bmuhits import BmuHitsView
bmuhitsview = BmuHitsView(12,12,'Data per node', text_size=24)
bmuhitsview.show(som, anotate=False, onlyzeros=False, labelsize=7, logaritmic=False)


Kluster = som.cluster(5)
hits  = HitMapView(20,20,"K-Means Clustering",text_size=16)
a=hits.show(som)


from ThirdSOM import bootstrap, HowManyK

SSE_Matrix = bootstrap(runs=10,k=20)
##################### average columns in 
SSE_Matrix = np.mean(SSE_Matrix, axis=0)

# SSE of K-means
plt.plot(np.arange(2,SSE_Matrix.size+2), SSE_Matrix)
plt.title('K-Means Optimal k')
plt.xlabel('Number of Clusters, k')
plt.ylabel('Sum Square Error')