Ejemplo n.º 1
0
    def dpath_from(class_object, dpath, fname=None):
        #'''Call as
        #   d = Som_Lmaped.dpath_from('....')
        #'''

        self = class_object(fname=fname)

        hf = h5py.File(dpath + "\\" + self.fname + ".h5", 'r')

        self.lmap = np.array(hf.get('lmap_dataset'))
        n_labels = self.lmap.shape[0]
        n_columns = self.lmap.shape[1]
        n_rows = self.lmap.shape[2]

        self.surf_state = None  # too big too save on disk?
        self.bmusPlus = np.array(hf.get('bmusPlus_dataset'))

        self.som = somoclu.Somoclu(n_columns,
                                   n_rows,
                                   data=None,
                                   kerneltype=0,
                                   verbose=2,
                                   initialization='pca')

        self.som.bmus = self.bmusPlus[:, 0:1]
        self.som.umatrix = np.array(hf.get('som_umatrix_dataset'))
        self.som.codebook = np.array(hf.get('som_codebook_dataset'))

        hf.close()

        return self
Ejemplo n.º 2
0
 def __init__(self, X, k,homogeneous_codebook=True):
     self.X = X
     self.somn = int(np.sqrt(X.shape[0]//k))
     self.ndf = None
     self.ninfo = None
     self.nres=None
     if homogeneous_codebook:
         xmin,ymin = X.min(0)
         xmax,ymax = X.max(0)
         cobx,coby = np.meshgrid(np.linspace(xmin,xmax,self.somn),np.linspace(ymin,ymax,self.somn))
         self.inicodebook = np.transpose(np.array([cobx.ravel(),coby.ravel()],np.float32),(1,0))
         print('using {0}*{0} SOM nodes for {1} points'.format(self.somn,X.shape[0]))
         self.som = somoclu.Somoclu(self.somn, self.somn,initialcodebook=self.inicodebook.copy())
     else:
         self.som = somoclu.Somoclu(self.somn, self.somn)
     self.som.train(X,epochs=10)
def compute_som():
    c1 = np.random.rand(50, 3) / 5
    c2 = (0.6, 0.1, 0.05) + np.random.rand(50, 3) / 5
    c3 = (0.4, 0.1, 0.7) + np.random.rand(50, 3) / 5
    data = np.float32(np.concatenate((c1, c2, c3)))

    n_rows, n_columns = 100, 160
    som = somoclu.Somoclu(n_columns, n_rows, compactsupport=False)
    som.train(data)
    som.view_component_planes()
Ejemplo n.º 4
0
def grafica_som():
    normaliza()
    lista = []
    for i in range(len(GC2)):
        lista.append([MUT[i], MUTAA[i], GC2[i]])

    data = np.float32(np.array(lista))
    som = somoclu.Somoclu(100, 100, data=data)
    som.train()
    som.view_component_planes()
Ejemplo n.º 5
0
def infer_map(nb_cols, nb_rows, dataset, **kwargs):
    """Infer a self-organizing map from dataset.\n
    initialcodebook = None, kerneltype = 0, maptype = 'planar', gridtype = 'rectangular',
    compactsupport = False, neighborhood = 'gaussian', std_coeff = 0.5, initialization = None
    """
    if not hasattr(dataset, 'feature_vectors'):
        raise NoFeatureVectorsError("Attempted to train a Som model, "
                                    "but did not find feature vectors in the dataset.")
    som = somoclu.Somoclu(nb_cols, nb_rows, **kwargs)
    som.train(data=np.array(dataset.feature_vectors, dtype=np.float32))
    return som
Ejemplo n.º 6
0
 def fit(self, X, y=None):
     logger.info('<< SOM Model | Fit Method: RUN >>')
     self.som = somoclu.Somoclu(self.dim,
                                self.dim,
                                gridtype='hexagonal',
                                initialization="pca")
     self.som.train(data=X, epochs=self.epochs)
     self.distances = [min(x) for x in self.som.get_surface_state(X)]
     self.t = self.__calc_treshold(self.distances)
     logger.info('<< SOM Model | Fit Method: DONE >>')
     return self
Ejemplo n.º 7
0
    def _cluster(self, X_sub):
        """
        Description: Clusters the normalized input data
        Returns ----------
        som - Object with topological cluster map
        """
        som = somo_algorithm.Somoclu(self.som_cols,
                                     self.som_rows,
                                     compactsupport=False)
        som.train(np.float32(X_sub), epochs=self.iterations)

        return som
Ejemplo n.º 8
0
    def run(self, phase, **phase_ctx): 
        curr_input = phase_ctx.get("phase_input")
        result_paths = phase_ctx.get("result_paths")
        filepath = curr_input[0]
        index = json.load(open(filepath, 'r'))
        matrix = index["matrix"]
        term = index["term"]
        doc = index["doc"]

        greys = plt.get_cmap("Greys")
        data = np.asarray(matrix)
        som = somoclu.Somoclu(self.__cols, self.__rows, data=data.T, initialization="pca", maptype="toroid", gridtype="hexagonal")
        som.train()
        som.view_umatrix(bestmatches=True, filename=result_paths[0], colormap=greys)
Ejemplo n.º 9
0
def test_som():

	cur = enable_connection()

	companies_data = data_fetch.get_data(cur)
	companies_names = data_fetch.get_names(cur)
	#print(companies_data.shape)
	#print(companies_data)
	most_similar_companies = similarity_func(companies_data[13], companies_data, companies_names, 5)
	#print(most_similar_companies)
	print(list(most_similar_companies.values()))
	for company_id, company_distance in most_similar_companies.items():
		#print(company_id)
		print(companies_names[int(company_id)])
		print(company_distance)
	#print(companies_names[list(most_similar_companies.keys())])
	# c1 = np.random.rand(50, 3)/5
	# c2 = (0.6, 0.1, 0.05) + np.random.rand(50, 3)/5
	# c3 = (0.4, 0.1, 0.7) + np.random.rand(50, 3)/5
	# data = np.float32(np.concatenate((c1, c2, c3)))
	
	# fig = plt.figure()
	# ax = Axes3D(fig)
	# ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=colors)
	# labels = range(150)

	dim_multiplier = 2
	n_rows, n_columns = companies_data.shape[0] * dim_multiplier, \
					    companies_data.shape[1] * dim_multiplier
	som = somoclu.Somoclu(n_columns, n_rows, maptype="toroid",
                       compactsupport=False)
	colors = ["red"] * 60
	colors.extend(["green"] * 60)
	colors.extend(["blue"] * 61)
	unit_labels = range(181)
	som.train(companies_data, epochs=1)
	som.view_umatrix(bestmatches=True, labels=companies_names)
	activation_map = som.get_surface_state()
	# bestmatchs = som.get_bmus(activation_map)
	# print(activation_map)
	# print(bestmatchs)
	# #print(som.activation_map)
	# som.view_component_planes()
	plt.savefig('pic8.png')
Ejemplo n.º 10
0
 def train(self, data, som_x, som_y, epochs=10, **kwargs):
     """Train the map and return results as a dict.
     :param data: Training data used in SOM.
     :type data: 2D numpy.array of float32.
     :param som_x: X-size of the map.
     :type som_x: int.
     :param som_y: Y-size of the map.
     :type som_y: int.
     :param epochs: Number of rounds the training is performed.
     :type epochs: int.
     :rtype: dict
     """
     self.som = somoclu.Somoclu(
         som_x,
         som_y,
         kerneltype=kwargs.pop("kerneltype", 0),
         verbose=kwargs.pop("verbose", 2),
         neighborhood=kwargs.pop("neighborhood", "gaussian"),
         std_coeff=kwargs.pop("std_coeff", 0.5),
         maptype=kwargs.pop("maptype", "toroid"),
         initialcodebook=kwargs.pop("initialcodebook", None),
         initialization=kwargs.pop("initialization", 'random'),
         gridtype=kwargs.pop("gridtype", "rectangular"))
     self.som.train(data,
                    epochs,
                    radius0=kwargs.pop("radius0", 0),
                    radiusN=kwargs.pop("radiusN", 1),
                    radiuscooling=kwargs.pop("radiuscooling", "linear"),
                    scale0=kwargs.pop("scale0", 0.1),
                    scaleN=kwargs.pop("scaleN", 0.01),
                    scalecooling=kwargs.pop("scalecooling", "linear"))
     return {
         'codebook': self.som.codebook.copy(),
         'bmus': self.som.bmus.copy(),
         'umatrix': self.som.umatrix.copy(),
         'n_columns': self.som._n_columns,
         'n_rows': self.som._n_rows,
         'n_dim': self.som.n_dim,
         'clusters': None
     }
Ejemplo n.º 11
0
 def add_layer(self,
               n_columns,
               n_rows,
               n_dim,
               initialcodebook=None,
               kerneltype=0,
               maptype="planar",
               gridtype="rectangular",
               compactsupport=True,
               neighborhood="gaussian",
               std_coeff=0.5,
               initialization=None,
               data=None,
               verbose=0):
     som = somoclu.Somoclu(n_columns, n_rows, initialcodebook, kerneltype,
                           maptype, gridtype, compactsupport, neighborhood,
                           std_coeff, initialization, data, verbose)
     som.n_dim = n_dim
     som_name = "som_{}".format(self._n_layers)
     self._layers[som_name] = som
     self._dim_layers.append((n_rows, n_columns, n_dim))
     self._n_layers += 1
    def run_som(self):
        """
         The method runs a unsupervised evaluation using the self-organized maps (SOM).
         It shows the differences between each language based on the features map. (see doc method prepare_data_to_som)
        :return:
        """
        X_normalized = self._prepare_data_to_som()

        n_columns = 600
        n_rows = 400

        colors = random.choices(list(mcolors.CSS4_COLORS.keys()),
                                k=len(self.labels))

        som = somoclu.Somoclu(n_columns, n_rows, data=X_normalized)
        som.train(epochs=50)

        map_filename = "map-" + get_random_string() + '.pdf'
        som.view_umatrix(bestmatches=True,
                         bestmatchcolors=colors,
                         labels=self.labels,
                         filename=map_filename)
Ejemplo n.º 13
0
def get_somoclu(data, mapsize, epochs=10):
    som = somoclu.Somoclu(*mapsize)
    # if type(data) == 'pandas.core.frame.DataFrame':
    data = np.array([list(data[i]) for i in data.columns]).astype(np.float32)
    som.train(data=data, epochs=epochs)
    '''
    codebook = np.reshape(som.codebook, (mapsize[0]*mapsize[1], np.shape(data)[0],))
    QE = []
    TE = 0
    dist = []
    for sample in data.columns:
        for idx, w in enumerate(codebook):
            dist.append(np.linalg.norm(w-data[sample]))

    min_idx = dist.index(min(dist))
    QE.append(dist.pop(min_idx))
    if np.abs(min_idx-dist.index(min(dist))) > 1:
        TE += 1

    print "QE = ", 1./(np.shape(data)[1]) * sum(QE)
    print "TE = ", 1./(np.shape(data)[1]) * TE
    '''
    return som.codebook
Ejemplo n.º 14
0
def run_som(data, nrows, ncolumns, maptype="planar", gridtype="hexagonal", inicialization="pca"):
  
  labels = range(data.shape[0])

  n_rows, n_columns = nrows, ncolumns

  som = somoclu.Somoclu(n_columns, n_rows, maptype=maptype, gridtype=gridtype,
                        compactsupport=True, initialization=inicialization)
     
  """
  SOMOCLU: Classe para treino e visualização do SOM.

  """

  print("Training map...")
  som.train(data)
  print("Map trained!")

  """
  Treina o mapa usando os dados atuais no objeto Somoclu.

  """

  som.cluster()
  
  """ Classifica os neurônios, preenchendo a variável som.clusters, também seleciona
      as BMUs(neuônios que são exibidos no mapa) para cada entrada.
  """  
                        
  som.view_umatrix(bestmatches=True, labels=labels, filename='./mapa.png')
  print("Map image created at program's directory!")
  """
    Plota a U-Matrix do mapa treinado.
  """

  np.savetxt("./clusters.csv", som.clusters, delimiter=",")
  """ som.bmus possui as coordenadas das BMUs, que são as 
      células que conseguimos ver no mapa.
      
      som.clusters(arquivo cluster.csv) é o resultado do método som.cluster() e 
      possui a classificação de cada neurônio do mapa(ao total: (n_row * n_columns) 
      neurônios).
      
      Como a localização dos neurônios após o treinamento é fixa no mapa, através 
      das coordenadas das BMUs em som.bmus, é possível extrair as suas respectivas 
      classificações em som.clusters apenas em função de suas coordenadas.
      """

  clusters = pd.read_csv('./clusters.csv')
  id_classes = np.empty((len(data),2), dtype=int)
  id_class = pd.DataFrame()
  """ id_classes: array utilizado para armazenar as classes resultantes da cluesterização,
      será usado para salvar o arquivo classes.csv no formato:
          ID CLASSE 
          ..   ..
          ..   ..
          ..   ..
          ..   ..
  """
  i=-1
  for linha, coluna in som.bmus:
      i=i+1
      id_classes[i][0] = labels[i]                      #id
      id_classes[i][1] = som.clusters[linha][coluna]    #classe

  output = pd.DataFrame(id_classes, columns=['ID', 'Classe'])
  output.to_csv('./classes.csv', sep=',', index=False)
  print("Classification file succesfully created at program's directory!")

  f= open("./bmus.txt","w+")
  """escreve as coordenadas de cada bmu para arquivo "bmus.txt"""
  coordinates = []
  i=1
  j=0
  for x,y in som.bmus:
      print(("ID %d: (%d, %d)\n" % (i, x, y)), file=f)
      coordinates.append({'Id': i, 'x':x, 'y':y, 'cluster': som.clusters[x][y]})
      i=i+1
  f.close()
  return pd.DataFrame(coordinates)
Ejemplo n.º 15
0
#         </a>
#     </div>
# </div> 
# 
# ## Εκπαίδευση Χάρτη
# Με το `final_set` μπορούμε πλέον να εκπαιδεύσουμε το χάρτη.
# 
# Θα χρησιμοποιήσουμε τη λειτουργικοτητα της βιβλιοθήκης [Somoclu](http://somoclu.readthedocs.io/en/stable/index.html). Αρχικοποιούμε το χάρτη με `PCA` (Principal Component Analysis). Η `PCA` αρχικοποιεί το χάρτη από τον πρώτο υπόχωρο που σχηματίζουν τα 2 πρώτα ιδιοδιανύσματα του πίνακα συσχέτισης. Πειραματιστήκαμε με μεγέθη SOM και clusters και παραθέτουμε παρακάτω ένα δείγμα από τους SOM που εκπαιδεύσαμε. Οι χρόνοι εκπαίδευσης ήταν σχετικά μικροί, λόγω και του μεγέθους του βελτιστοποιημένου corpus:

# In[1]:


import somoclu as smcl

n_rows, n_columns = 30, 30
som = smcl.Somoclu(n_columns, n_rows, compactsupport = False, initialization = "pca", verbose = 2)
get_ipython().magic('time som.train(final_set, epochs = 100)')


# In[236]:


# som1 = joblib.load('som_23x23_5000_120.pkl')
# som2 = joblib.load('som_30x30_5000_100.pkl')
# som3 = joblib.load('som_45x45_5000_150.pkl')
# print('\nSize and Training Epochs on SOM Clustering Results\n')
# print('23x23 grid - 100 epochs:')
# som1.view_umatrix(bestmatches = False, colorbar = False, figsize = (7, 7))
# print('30x30 grid - 120 epochs:')
# som2.view_umatrix(bestmatches = False, colorbar = False, figsize = (7, 7))
# print('45x45 grid - 150 epochs:')
# install somoclu
get_ipython().system(u'pip install --upgrade somoclu')
# import sompoclu, matplotlib
import somoclu
import matplotlib
# we will plot inside the notebook and not in separate window
get_ipython().magic(u'matplotlib inline')

# Καταρχάς διαβάστε το [function reference](http://somoclu.readthedocs.io/en/stable/reference.html) του somoclu. Θα δoυλέψουμε με χάρτη τύπου planar, παραλληλόγραμμου σχήματος νευρώνων με τυχαία αρχικοποίηση (όλα αυτά είναι default). Μπορείτε να δοκιμάσετε διάφορα μεγέθη χάρτη ωστόσο όσο ο αριθμός των νευρώνων μεγαλώνει, μεγαλώνει και ο χρόνος εκπαίδευσης. Για το training δεν χρειάζεται να ξεπεράσετε τα 100 epochs. Σε γενικές γραμμές μπορούμε να βασιστούμε στις default παραμέτρους μέχρι να έχουμε τη δυνατότητα να οπτικοποιήσουμε και να αναλύσουμε ποιοτικά τα αποτελέσματα. Ξεκινήστε με ένα χάρτη 10 x 10, 100 epochs training και ένα υποσύνολο των ταινιών (π.χ. 2000). Χρησιμοποιήστε την `time` για να έχετε μια εικόνα των χρόνων εκπαίδευσης. Ενδεικτικά, με σωστή κωδικοποίηση tf-idf, μικροί χάρτες για λίγα δεδομένα (1000-2000) παίρνουν γύρω στο ένα λεπτό ενώ μεγαλύτεροι χάρτες με όλα τα δεδομένα μπορούν να πάρουν 10-15 λεπτά ή και περισσότερο.
#

# In[ ]:

# initialize Somoclu with 30x30 map
n_columns, n_rows = 30, 30
som = somoclu.Somoclu(n_columns, n_rows)

# import time to measure
import time

# train the final set
start_time = time.time()
som.train(final_set, epochs=100)
end_time = time.time()

# print training time
train_time = (end_time - start_time) / 60
print("Training time for the final set is :", train_time, "minutes")

#
# ## Best matching units
Ejemplo n.º 17
0
labels = dataset.iloc[:, 0]
"""Remove os ID's para não influenciarem no agrupamento"""
data = np.float32(dataset.iloc[:, 1:].values)
"""Se o np.float32() não for usado, será emitido o seguinte alerta durante
    a execução: Warning: data was not float32. A 32-bit copy was made
   e os dados serão transformados automaticamente para o tipo float32
   """
""" Quantidade de neurônios na rede.
    Os valores das linhas e colunas podem ser alterados (valores muito grandes
    exigirão muito processamento).
"""
n_rows, n_columns = 100, 100

som = somoclu.Somoclu(n_columns,
                      n_rows,
                      maptype="planar",
                      gridtype="rectangular",
                      compactsupport=True,
                      initialization="pca")
"""SOMOCLU: Classe para treino e visualização do SOM.
    Atributos:
        codebook     Codebook do SOM
        bmus         As BMUs(best matching points) correspondentes as dados.
    :param n_columns: Número de colunas no mapa.
    :type n_columns: int.
    :param n_rows: Número de linhas no mapa.
    :type n_rows: int.
    :param initialcodebook: Parametro opcional para inicializar o treinamento com 
                            um dado codebook.
    :type initialcodebook: 2D numpy.array of float32.
    :param kerneltype: Parametro opcional para especificar qual kernel será usado
                           * 0: dense CPU kernel (padrão)
Ejemplo n.º 18
0
 def __init__(self, filename, 
              filepath="datasets", 
              rows=5, 
              columns=5,
              epochs=10, 
              delimiter=",", 
              initialization="pca", 
              error="euclid", 
              maptype="planar", 
              gridtype="rectangular"):
     """
     Constructor for the SOM class.
     """
     self.filename = filename
     self.filepath = filepath
     file = os.path.join(filepath,filename)
     self.epochs = epochs
     self.rows, self.columns = rows, columns
     self.maptype = maptype
     self.gridtype = gridtype
     input_data = Preprocessor(file, delimiter=delimiter)
     self.data = input_data.data
     self.data_norm = input_data.data_normalized.values
     self.labels = input_data.labels.values
     self.objects_count = input_data.objects_count
     self.inlier_count = input_data.inlier_count
     self.outlier_count = input_data.outlier_count
     self.dimensions_count = input_data.dimensions_count
     self.som = somoclu.Somoclu(columns, rows, compactsupport=False,
                                initialization=initialization,
                                maptype=maptype, gridtype=gridtype)
     self.som.train(self.data_norm, epochs=epochs)
     self.bmus = self.som.get_bmus(self.som.get_surface_state())
     self.qe = []
     self.qebmu = []
     self.bmulocations = []
     if error == "euclid":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = np.linalg.norm(
                     self.bmulocations[i] - self.data_norm[i])
             self.qe.append(qerror)
     elif error == "max":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = np.amax(self.bmulocations[i] - self.data_norm[i])
             self.qe.append(qerror)
     elif error == "manhattan":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = spdist.cityblock(self.bmulocations[i], 
                                       self.data_norm[i])
             self.qe.append(qerror)
     elif error == "cosine":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = spdist.cosine(self.bmulocations[i], 
                                    self.data_norm[i])
             self.qe.append(qerror)
     elif error == "correlation":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = spdist.correlation(self.bmulocations[i], 
                                         self.data_norm[i])
             self.qe.append(qerror)
     elif error == "angle":
         for i in range(self.bmus.shape[0]):
             self.bmulocations.append(self.som.codebook[
                     self.bmus[i][1]][self.bmus[i][0]])
             qerror = np.arccos(
                     np.clip(np.dot(
                             (self.data_norm[i] \
                              / np.linalg.norm(self.data_norm[i])), 
                             (self.bmulocations[i] \
                              / np.linalg.norm(self.bmulocations[i]))),
                         -1.0, 1.0))
             self.qe.append(qerror)
     minmax_scaler = skpre.MinMaxScaler()
     standard_scaler = skpre.StandardScaler()
     self.qe_scaled = minmax_scaler.fit_transform(
             np.array(self.qe).reshape(-1,1))
     self.qe_scaled_std = standard_scaler.fit_transform(
             np.array(self.qe).reshape(-1,1))
     self.fpr, self.tpr, self.thresholds = roc_curve(self.labels, self.qe)
     self.roc_auc = auc(self.fpr, self.tpr)
     self.precision, self.recall, self.threshold_pr = \
         precision_recall_curve(self.labels, self.qe)
     self.pr_auc = auc(self.recall, self.precision)
     self.f1_scores = 2 * (self.precision * self.recall) \
         / (self.precision + self.recall)
     self.f1_score = np.max(np.nan_to_num(self.f1_scores))
     self.logloss = log_loss(self.labels, self.qe)
     self.data['qe'] = self.qe
Ejemplo n.º 19
0
Repositório original: https://github.com/peterwittek/somoclu
"""

dataset = pd.read_csv('./teste-colunas.csv')
"""Seleciona os ID's, que serão os rótulos dos neurônios no mapa"""
labels = dataset.iloc[:, 0]
"""Remove os ID's para não influenciarem no agrupamento"""
data = np.float32(dataset.iloc[:, 1:].values)
"""Se o np.float32() não for usado, será emitido o seguinte alerta durante
    a execução: Warning: data was not float32. A 32-bit copy was made
   e os dados serão transformados automaticamente para o tipo float32
   """
"""Os valores das linhas e colunas podem ser alterados"""
n_rows, n_columns = 100, 100

som = somoclu.Somoclu(n_columns, n_rows, initialization="pca")

som.train(data, epochs=10)
"""Treina o mapa usando os dados atuais no objeto Somoclu."""

som.cluster()
""" Classifica os neurônios, preenchendo a variável som.clusters, também seleciona
    as BMUs(neuônios que são exibidos no mapa) para cada entrada.
    
"""

som.view_umatrix(bestmatches=True, labels=labels, filename='./mapa.png')
"""Plota a U-Matrix do mapa treinado. """

np.savetxt("./bmus.txt", som.bmus)
np.savetxt("./clusters.csv", som.clusters, delimiter=",")
Ejemplo n.º 20
0
data = df[["night", "morning", "afternoon", "evening"]].as_matrix()

#data= np.float32(np.concatenate((night, morning, afternoon))) #, df["afternoon"], df["evening"])))
#print(data)
"""n_rows, n_columns = 100, 150
som = somoclu.Somoclu(n_columns, n_rows, data=data, maptype="planar",gridtype="rectangular")
som.train(epochs=100)
#som.view_umatrix()
som.view_umatrix(bestmatches=True)
"""

colors = ["red"] * 400
colors.extend(["green"] * 400)
colors.extend(["blue"] * 400)

n_rows, n_columns = 150, 180
som = somoclu.Somoclu(n_columns,
                      n_rows,
                      maptype="toroid",
                      compactsupport=False,
                      initialization="pca")

som.train(data)
som.cluster()

print("plane")
som.view_component_planes()
print("U-matrix BMU")
som.view_umatrix(bestmatches=True)
print("U-matrix sans BMU")
som.view_umatrix()
Ejemplo n.º 21
0
    som_dimension = int(parameters['hyperparam']['som_dimension'])
    n_columns, n_rows = som_dimension, som_dimension

    num_epochs = int(parameters['hyperparam']['som_training_epochs'])
    print("n_columns, n_rows:", n_columns, n_rows)
    print("num_epochs:", num_epochs)

    if is_cuda:
        som_kernel = 1
    else:
        som_kernel = 0

    som = somoclu.Somoclu(n_columns,
                          n_rows,
                          maptype="planar",
                          kerneltype=som_kernel,
                          gridtype="rectangular",
                          initialization='pca',
                          verbose=2)
    som.train(all_vectors.numpy(), epochs=num_epochs)

    # Saving som object as pickle for later loading
    pickle_file_directory = "som_pickles"
    if not os.path.exists(pickle_file_directory):
        os.mkdir(pickle_file_directory)

    pickle.dump(
        som,
        open(
            os.path.join(
                pickle_file_directory,
Ejemplo n.º 22
0
import somoclu
import os




#generating data_transformation file
if not(os.path.isfile('6PM_data_transformation.xlsx')):
    exec(open('6PM_data_preparation.py').read())

dataset= pd.read_excel("6PM_data_transformation.xlsx")
product = dataset[['MntAcessoriesPercent','MntBagsPercent','MntClothingPercent','MntAthleticPercent','MntShoesPercent']]

df= np.float32(product.values)

n_rows, n_columns=9,9

som=somoclu.Somoclu(n_columns,n_rows,gridtype='rectangular',neighborhood='bubble',initialization='pca')

som.train(df, epochs=25)
map_state=som.get_surface_state()
BMUs=som.get_bmus(map_state)
som.cluster()


som.view_umatrix(colorbar=True,figsize=(5,5))
clusters=som.clusters
som.view_component_planes(colorbar=True,figsize=(5,5))


Ejemplo n.º 23
0
                                        Kmeans_clustering = np.zeros(6)

                                        print("grid: " + str(grid_size) +
                                              "; learning_rate: " +
                                              str(learning_rate) +
                                              "; neighboorhood_radius: " +
                                              str(neighboorhood_radius) +
                                              "; r_cooling: " + r_cooling +
                                              "; a_cooling: " + a_cooling +
                                              " ;neighboorhood:" +
                                              neighboorhood)

                                        for jj in range(1):
                                            som = somoclu.Somoclu(
                                                grid_size,
                                                grid_size,
                                                neighborhood=neighboorhood)
                                            som.train(
                                                data=come_xuchu,
                                                epochs=1000,
                                                radius0=neighboorhood_radius,
                                                radiusN=1,
                                                radiuscooling=r_cooling,
                                                scale0=learning_rate,
                                                scaleN=0.01,
                                                scalecooling=a_cooling)
                                            kmeaaanz = np.array(
                                                Parallel(
                                                    n_jobs=-1,
                                                    backend="threading")(
                                                        delayed(kmeanza)
Ejemplo n.º 24
0
plt.plot(TIME, MeanC, 'r--', c='black', linewidth=3)
plt.ylim([-6, 10])
plt.text(4, -1.5, 'Mean curve')
print MeanC
plt.show()
######################
#SOM:
######################
#PLANAR MAP:
ncl = 20

#initialization
som = somoclu.Somoclu(10,
                      10,
                      data=dw,
                      maptype="planar",
                      gridtype="rectangular",
                      neighborhood='gaussian',
                      initialization='random')
som.train()
####plot the component planes of the trained codebook of the ESOM
som.view_component_planes()
#############################################################
#we can plot the U-matrix, together with the best matchin units for each data point. We color code
#the units with the classes of the data points and also add the labels of the data points
som.view_umatrix(bestmatches=True, labels=rownames)
#############################################################
#zooming in to a region of interest:<upper right corner here
som.view_umatrix(bestmatches=True, labels=rownames, zoom=((8, 10), (8, 10)))
##########################
#repeating with Hexagonal topology, and hexagonal neurons
Ejemplo n.º 25
0
    ("/home/remi/Documents/internship/shared/script-experiments/decreased-rank-udp/output.json",
     "/home/remi/Documents/internship/shared/script-experiments/decreased-rank-udp/powertracker.log"
     ),
    ("/home/remi/Documents/internship/shared/script-experiments/flooding-udp/output.json",
     "/home/remi/Documents/internship/shared/script-experiments/flooding-udp/powertracker.log"
     ),
    ("/home/remi/Documents/internship/shared/script-experiments/increased-version-udp/output.json",
     "/home/remi/Documents/internship/shared/script-experiments/increased-version-udp/powertracker.log"
     )
])

p.plot()
p.normalizeData()
print(p.normData)

#to print the vector in a file
# f = open("/home/remi/Documents/internship/shared/output.txt", "w")
# for line in p.normData:
#     f.write(line+"\n")
# f.close()

data = np.float32(p.normDataArray)
n_rows, n_columns = 100, 160
som = somoclu.Somoclu(n_columns, n_rows, compactsupport=False)
som.train(data, scale0=0.1, epochs=10)
som.view_component_planes(bestmatches=True, bestmatchcolors=p.colors)

# print(p.colors)
# print(len(p.colors), len(p.normDataArray))
som.view_umatrix(bestmatches=True, bestmatchcolors=p.colors)
print("FINISH")
Ejemplo n.º 26
0
import somoclu
#%%-------------------------------------------------------------------------------------
c1 = np.random.rand(50, 3) / 5
c2 = (0.6, 0.1, 0.05) + np.random.rand(50, 3) / 5
c3 = (0.4, 0.1, 0.7) + np.random.rand(50, 3) / 5
data = np.float32(np.concatenate((c1, c2, c3)))
colors = ["red"] * 50
colors.extend(["green"] * 50)
colors.extend(["blue"] * 50)
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(data[:, 0], data[:, 1], data[:, 2], c=colors)
labels = range(150)
#%%-------------------------------------------------------------------------------------
n_rows, n_columns = 100, 160
som = somoclu.Somoclu(n_columns, n_rows, data=data)
som.train(epochs=10)
som.view_umatrix(bestmatches=True, bestmatchcolors=colors, labels=labels)
#%%-------------------------------------------------------------------------------------
som.view_component_planes()
#%%-------------------------------------------------------------------------------------
som.view_umatrix(bestmatches=True, bestmatchcolors=colors, labels=labels)
#%%-------------------------------------------------------------------------------------
som.view_umatrix(bestmatches=True,
                 bestmatchcolors=colors,
                 labels=labels,
                 zoom=((50, n_rows), (100, n_columns)))
#%%-------------------------------------------------------------------------------------
som = somoclu.Somoclu(n_columns, n_rows, data=data, maptype="toroid")
som.train()
som.view_umatrix(bestmatches=True, bestmatchcolors=colors)
Ejemplo n.º 27
0
             (True, 'linear', 'reuters', 15, 0.7, 5, 2, 'Normal', 'linear',
              'TFIDF', '3k', come_xuchu6, 'bubble')]
    resposta = []

    for i, melhor in enumerate(top_5):
        print(str(i + 1) + "ª melhor configuracao")

        best_score = -1000
        best_kmeanz = None
        best_som = None
        silhueta_acumulador = 0

        for j in range(5):
            print(j + 1)
            som = somoclu.Somoclu(melhor[3],
                                  melhor[3],
                                  neighborhood=melhor[12])
            dados, mapeador = outs.return_new_ndarray_indices(melhor[11], 0.07)
            som.train(data=dados,
                      epochs=1000,
                      radius0=melhor[6],
                      radiusN=1,
                      radiuscooling=melhor[8],
                      scale0=melhor[4],
                      scaleN=0.01,
                      scalecooling=melhor[1])
            kmeans = KMeans(n_clusters=melhor[5])
            som.cluster(kmeans)
            kmeanz = Kmeanz()
            kmeanz.clusters = clusterizacao(som)
            kmeanz.dados = dados
pca.fit(finalData)
projected=pca.fit_transform(finalData)


"""SOM PU"""
import numpy as np
import somoclu
import pandas as pd

data = sixpm[['MntAcessories_norm', 'MntClothing_norm', 'MntBags_norm',\
              'MntAthletic_norm','MntPremiumProds_norm','MntShoes_norm']]
df=np.float32(data.values)

n_rows, n_columns = 20, 20

som = somoclu.Somoclu(n_columns, n_rows, maptype="toroid")
som = somoclu.Somoclu(n_columns, n_rows, gridtype="rectangular")
som = somoclu.Somoclu(n_columns, n_rows, initialization = 'pca')
som.train(df, epochs = 50)

som.train(df, radius0 = 0.1, radiusN = 0.01)

som.view_component_planes()
som.cluster()
som.view_umatrix(bestmatches=True)



"""SOM CV"""
sixpm['Education'] = np.where((sixpm['Education']=='Basic'), 1, sixpm['Education'])
sixpm['Education'] = np.where((sixpm['Education']=='2n Cycle'), 2, sixpm['Education'])
Ejemplo n.º 29
0
 def show_umatrix(self):
     som = somoclu.Somoclu(self.map.cols, self.map.rows)
     som.train(self.data.values)
     som.view_umatrix(bestmatches=True)
Ejemplo n.º 30
0
# coding: utf-8

# In[1]:


import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import somoclu
get_ipython().magic(u'matplotlib inline')

c1 = np.random.rand(50,3)/5
c2 = (0.6, 0.1, 0.05) + np.random.rand(50, 3)/5
c3 = (0.4, 0.1, 0.7) + np.random.rand(50, 3)/5

data = np.float32(np.concatenate((c1, c2, c3)))
colors = ['red'] * 50
colors.extend(['green'] * 50)
colors.extend(['blue'] * 50)
fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(data[:,0], data[:, 1], data[:, 2], c = colors)

#training somoclu
n_rows, n_columns = 100, 160
som = somoclu.Somoclu(n_columns, n_rows, data = data)
get_ipython().magic(u'time som.train()')

som.view_component_planes()