def func(data, params): # parse parameters for item in params: if isinstance(params[item], str): exec(item + '=' + '"' + params[item] + '"') else: exec(item + '=' + str(params[item])) # set som som = minisom.MiniSom(nx, ny, data.shape[1], sigma=sigma, learning_rate=learn_rate, random_seed=random_seed) # run som if som_type == 'random': som.train_random(data, int(Niter)) elif som_type == 'batch': som.train_batch(data, int(Niter)) # get and save som output w = np.array([som.winner(item) for item in data]) np.savetxt('som_res/som_index.dat', w, fmt="%d %d") return
def som_quantization_error(space): sig = space['sig'] learning_rate = space['learning_rate'] error = ms.MiniSom(x=x, y=y, input_len=input_len, sigma=sig, learning_rate=learning_rate).quantization_error(X_train) return {'loss': error, 'status': STATUS_OK}
def createSOM(data, epochs, height, width, num_features, sigma, learning_rate, neighborhood_function, random_seed=10, saveWeightsName=None): som=minisom.MiniSom(height, width, num_features, sigma=sigma, learning_rate=learning_rate, neighborhood_function=neighborhood_function, random_seed=random_seed) som.pca_weights_init(data) som.train_random(data, epochs, learn_curve=False) if saveWeightsName!=None: writeSOM(som,saveWeightsName) return som
def loadSOM(weightsFile,sigma,learning_rate,neighborhood_function='bubble',random_seed=10): weights=readWeights(weightsFile) height=weights.shape[0] width=weights.shape[1] num_features=weights.shape[2] som=minisom.MiniSom(height, width, num_features, sigma=sigma, learning_rate=learning_rate, neighborhood_function=neighborhood_function, random_seed=random_seed) weights=readWeights(weightsFile) som.set_weights(weights) return som
def som_10_fold_cross_validation(self, size, iterations): data = self.data[[ self.target, 'danceability', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo' ]] data = data.sample(frac=1) spl = 10 n_rows = int(data.shape[0]) a = int(np.floor(n_rows / spl)) end = [] for i in range(spl - 1): end.append(data.iloc[a * i:a * (i + 1)]) end.append(data.iloc[(8 * a):(n_rows + 1)]) acc = [] for i in end: test_frame = i train_frame = pd.concat([x for x in end if not x.equals(i)]) y_test = (test_frame[self.target]).to_numpy() y_train = (train_frame[self.target]).to_numpy() X_test = (test_frame[[ 'danceability', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo' ]]).to_numpy() X_train = (train_frame[[ 'danceability', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo' ]]).to_numpy() final_som = minisom.MiniSom(size, size, 8, sigma=5, learning_rate=0.01, neighborhood_function='triangle') final_som.train(X_train, iterations, verbose=False) class_assignment = final_som.labels_map(X_train, y_train) y_pred = self.classify_som(final_som, X_test, class_assignment) acc.append( np.average( sklearn.metrics.accuracy_score(y_test, y_pred, normalize=True))) print('AVERAGE 10-FOLD CROSS VALIDATION ACCURACY: ') print(np.average(acc))
def trainSom(data, x, y, epochs, verbose=False): """ trains a som given the input data :param data: input, must be [Nx3] :param x: number of nodes in the x direction :param y: number of nodes in the y direction :param epochs: number of training iterations :param verbose: if True prints information during training :return: result of the som training """ som = minisom.MiniSom(x, y, 3, sigma=0.5, learning_rate=0.5, random_seed=1) som.random_weights_init(data) som.train_random(data, epochs, verbose=verbose) winmap = som.win_map(data) return winmap
def execute_som_model(self, size, iterations, sigma, learning_rate, neighborhood_function): labels = self.data[self.target] features = np.apply_along_axis(lambda x: x / np.linalg.norm(x), 1, self.data[self.feature_names]) X_train, X_test, y_train, y_test = train_test_split(features, labels, stratify=labels) final_som = minisom.MiniSom( size, size, 8, sigma=sigma, learning_rate=learning_rate, neighborhood_function=neighborhood_function) final_som.train(X_train, iterations, verbose=False) class_assignment = final_som.labels_map(X_train, y_train) y_pred = self.classify_som(final_som, X_test, class_assignment) print('METRICS FOR THE SOM with size: ' + str(size) + ', ' + str(iterations) + ' iterations, sigma ' + str(sigma) + ', learning rate: ' + str(learning_rate) + ' and neighborhood function: ' + str(neighborhood_function)) w = final_som.get_weights() plt.figure(figsize=[12.8, 15]) for c, i in enumerate([ 'danceability', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo' ]): plt.subplot(421 + c) plt.title(i) plt.pcolor(w[:, :, c].T, cmap='Spectral') plt.xticks(np.arange(size + 1, step=size / 6)) plt.yticks(np.arange(size + 1, step=size / 6)) plt.tight_layout() plt.show() print('Accuracy Score: ' + str( sklearn.metrics.accuracy_score(y_test, y_pred, normalize=True))) print(sklearn.metrics.confusion_matrix(y_test, y_pred)) print(sklearn.metrics.classification_report(y_test, y_pred))
def explore_som_classification_parameters(self): labels = self.data[self.target] features = np.apply_along_axis(lambda x: x / np.linalg.norm(x), 1, self.data[self.feature_names]) X_train, X_test, y_train, y_test = train_test_split(features, labels, stratify=labels) results = [] for lr in [0.1, 0.01, 0.001]: for sig in [5, 10, 15]: for fct in ['gaussian', 'mexican_hat', 'bubble', 'triangle']: som = minisom.MiniSom(50, 50, 8, sigma=sig, learning_rate=lr, neighborhood_function=fct, random_seed=10) som.train(X_train, 1000, verbose=False) class_assignment = som.labels_map(X_train, y_train) y_pred = self.classify_som(som, X_test, class_assignment) results.append([ lr, sig, fct, sklearn.metrics.accuracy_score(y_test, y_pred, normalize=True) ]) names = [ 'learning rate', 'sigma', 'neighborhood fct.', 'accuracy score' ] plt.figure(figsize=[20, 20]) fig, ax = plt.subplots() ax.axis('off') ax.axis('tight') df = pd.DataFrame(np.array(results), columns=names) ax.table(cellText=df.values, colLabels=df.columns, loc='center') plt.show()
learning_rate=learning_rate).quantization_error(X_train) return {'loss': error, 'status': STATUS_OK} # hyperparameter tuning to obtain sigma and learning rate trials = Trials() best = fmin(fn=som_quantization_error, space=space, algo=tpe.suggest, max_evals=100, trials=trials) print(best) som = ms.MiniSom(x=x, y=y, input_len=input_len, sigma=8.007684739287342, learning_rate=4.486348532872689) som.pca_weights_init(X_train) som.train_batch(X_train, 100) class_assignments = som.labels_map(X_train, y_train) print( sklearn.metrics.classification_report( y_test, classify(som, X_test, class_assignments))) # saving the som in the file som.p with open('synthetic_som.p', 'wb') as outfile: pickle.dump(som, outfile) #feature selection
import os import numpy import minisom normalisation = numpy.array([1.75737120e+02, 3.04404448e+05, 1.23669645e+06, 4.79486957e+02, 2.29390100e+06, 1.61616659e+01, 3.35020527e+06, 2.59328397e+05, 4.79121435e+04, 7.28746693e+04, 1.54229751e+06]) weights = numpy.loadtxt(os.path.dirname(os.path.realpath(__file__)) + '/data/som_weights.txt').reshape((25, 25, len(normalisation))) som = minisom.MiniSom(25, 25, len(normalisation)) som._weights = weights def get_winner(region): feature_names = [ 'rental_rate', 'median_rent', 'income', 'religious', 'population', 'unemployment' ] rows_and_columns = [] features = [region[feature] for feature in feature_names] for zone_type in "RCIWP": features.append(region['zoning'].get(zone_type, 0)) rows_and_columns.append(numpy.nan_to_num(features)) features = rows_and_columns / normalisation return som.winner(features[0])
@author: MahZaky <<< SirMahZaky >>> """ import numpy as np import matplotlib.pyplot as plt import minisom as som import random_cluster_generation as gen x,y = gen.get_data() color = ['red','blue'] labels = ['cluster 1', 'cluster 2' ] ma = som.MiniSom(1,2,2,random_seed=10) ma.train_random(x, 1000) plt.subplot(1,2,1)#real Sample ploting for i in range(2): dt = x[np.where(y == i)[0], :] plt.scatter(dt[:,0], dt[:,1], c=color[i], label =labels[i] ) plt.legend() llabels = ['cluster 1 learned', 'cluster 2 learned' ] plt.subplot(1,2,2) #plting learned clusters clasters_labels = ma.win_map(x) for key, poins in clasters_labels.items(): i = np.ravel_multi_index(key,(1,2))