def test_fit(X, n_rows, n_columns, train_mode_unsupervised, random_state, expected): som = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns, train_mode_unsupervised=train_mode_unsupervised, random_state=random_state) som.fit(X) assert isinstance(som.unsuper_som_, np.ndarray) assert som.unsuper_som_.shape == (n_rows, n_columns, X.shape[1]) assert np.allclose(som.unsuper_som_, expected, atol=1e-20) with pytest.raises(Exception): som = susi.SOMClustering(train_mode_unsupervised="alsdkf") som.fit(X)
def test_calc_neighborhood_func(radius_max, radius_min, max_it, curr_it, mode, expected): som_clustering = susi.SOMClustering() som_clustering.radius_max_ = radius_max som_clustering.radius_min_ = radius_min som_clustering.max_iterations_ = max_it assert som_clustering.calc_neighborhood_func(curr_it, mode) == expected
def test_calc_learning_rate(learning_rate_start, learning_rate_end, max_it, curr_it, mode, expected): som_clustering = susi.SOMClustering( learning_rate_start=learning_rate_start, learning_rate_end=learning_rate_end) som_clustering.max_iterations_ = max_it assert som_clustering.calc_learning_rate(curr_it, mode) == expected
def test_transform(n_rows, n_columns, X): som_clustering = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns) som_clustering.fit(X) bmus = som_clustering.transform(X) assert(len(bmus) == X.shape[0]) assert(len(bmus[0]) == 2)
def test_get_node_distance_matrix(datapoint, som_array, distance_metric, expected): som_clustering = susi.SOMClustering() som_clustering.distance_metric = distance_metric som_clustering.X_ = np.array([datapoint, datapoint]) som_clustering.n_rows = som_array.shape[0] som_clustering.n_columns = som_array.shape[1] som_clustering.init_unsuper_som() assert np.allclose(som_clustering.get_node_distance_matrix( datapoint, som_array), expected, rtol=1e-2)
def test_modify_weight_matrix_online(n_rows, n_columns, random_state, n_iter_unsupervised, X, learningrate, neighborhood_func, bmu_pos, dp, expected): som_clustering = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns, n_iter_unsupervised=n_iter_unsupervised, random_state=random_state) som_clustering.fit(X) assert np.allclose(susi.modify_weight_matrix_online( som_array=som_clustering.unsuper_som_, learningrate=learningrate, dist_weight_matrix=som_clustering.get_nbh_distance_weight_matrix( neighborhood_func, bmu_pos), true_vector=som_clustering.X_[dp]), expected, atol=1e-8)
def test_get_nbh_distance_weight_matrix(n_rows, n_columns, random_state, neighborhood_func, bmu_pos, X, mode, expected): som_clustering = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns, nbh_dist_weight_mode=mode, random_state=random_state) som_clustering.X_ = X som_clustering.init_unsuper_som() print(som_clustering.get_nbh_distance_weight_matrix( neighborhood_func, bmu_pos) ) print(expected) assert np.allclose(som_clustering.get_nbh_distance_weight_matrix( neighborhood_func, bmu_pos), expected, atol=1e-8)
def __init__(self, n_rows, n_columns, targets, seed): self.n_rows_ = n_rows self.n_columns_ = n_columns self.targets_ = targets self.seed_ = seed self.som = susi.SOMClustering(n_rows=n_rows, n_columns=n_columns, random_state=seed) self.training = np.concatenate((np.repeat( targets[0], 1000, axis=0), np.repeat(targets[1], 1000, axis=0)), axis=0) self.w1_ = None self.w2_ = None self.bmu1_ = None self.bmu2_ = None self.som_array_ = None
def test_init_unsuper_som(X, init_mode): som_clustering = susi.SOMClustering(init_mode_unsupervised=init_mode) som_clustering.X_ = X if init_mode in ["random", "random_data", "pca"]: som_clustering.init_unsuper_som() # test type assert isinstance(som_clustering.unsuper_som_, np.ndarray) # test shape n_rows = som_clustering.n_rows n_columns = som_clustering.n_columns assert som_clustering.unsuper_som_.shape == (n_rows, n_columns, X.shape[1]) else: with pytest.raises(Exception): som_clustering.init_unsuper_som()
def test_modify_weight_matrix_batch(X, nbh_func, bmus, expected): som = susi.SOMClustering( n_rows=2, n_columns=2, n_iter_unsupervised=5, random_state=42) som.fit(X) # calculate distance weight matrix for all datapoints dist_weight_block = np.zeros( (len(X), som.n_rows, som.n_columns)) for i, bmu_pos in enumerate(bmus): dist_weight_block[i] = som.get_nbh_distance_weight_matrix( nbh_func, bmu_pos).reshape( (som.n_rows, som.n_columns)) new_som = som.modify_weight_matrix_batch( som_array=som.unsuper_som_, dist_weight_matrix=dist_weight_block, data=som.X_) assert np.allclose(new_som, expected, atol=1e-8)
def test_set_bmus(som_array, X, n_jobs, expected): som_clustering = susi.SOMClustering(n_jobs=n_jobs) som_clustering.set_bmus(X, som_array) assert np.array_equal(som_clustering.bmus_, expected)
def test_get_bmu(som_array, datapoint, expected): som_clustering = susi.SOMClustering() assert np.array_equal(som_clustering.get_bmu(datapoint, som_array), expected)
def test_get_clusters(): som = susi.SOMClustering() som.fit(X) clusters = som.get_clusters(X) assert(len(clusters) == len(X)) assert(len(clusters[0]) == 2)
def test_get_u_matrix(n_rows, n_columns, mode): som = susi.SOMClustering(n_rows=n_rows, n_columns=n_columns) som.fit(X) u_matrix = som.get_u_matrix(mode=mode) assert(isinstance(u_matrix, np.ndarray)) assert(u_matrix.shape == (n_rows*2-1, n_columns*2-1, 1))
def test_som_clustering_init(n_rows, n_columns): som_clustering = susi.SOMClustering( n_rows=n_rows, n_columns=n_columns) assert som_clustering.n_rows == n_rows assert som_clustering.n_columns == n_columns
def test_get_datapoints_from_node(n_rows, n_columns, som_array, X, node, expected): som = susi.SOMClustering(n_rows=n_rows, n_columns=n_columns) som.set_bmus(X, som_array) assert(np.array_equal(som.get_datapoints_from_node(node), expected))
def SelfOrganisedMap(data, n_rows=30, n_columns=30): som = susi.SOMClustering(n_rows, n_columns) som.fit(data) reduced_data = som.get_clusters(data) return reduced_data
#entireDataset=pd.concat([allDataFeatSushi.loc[0:10,],allDataFeatSandwich.loc[0:10,]],ignore_index=True) entireDataset=pd.concat([allDataFeatSushi,allDataFeatSandwich],ignore_index=True) entireFeat=entireDataset.drop(['Class','filename'],axis=1).values#take the feature only featSushi=allDataFeatSushi.drop(['Class','filename'],axis=1).values featSandwich = allDataFeatSandwich.drop(['Class', 'filename'], axis=1).values #select number of som neurons as in numNeurons=15 if(SomFitON): #Apply Som Clustering somClus = susi.SOMClustering( n_rows=numNeurons, n_columns=numNeurons, verbose = 1, n_jobs=1#change this to use the number of cores you desire in your local machine... ) somClus.fit(entireFeat) #save the model on the disk.... somModelName = modelFolder + '/fittedSomModel.sav' fileFeatPKL = open(somModelName, 'wb') pkl.dump(somClus, fileFeatPKL) fileFeatPKL.close() print("SOM fitted!") else: #just load the existing model file for the som... somModelName = modelFolder + '/fittedSomModel.sav' fileFeatPKL = open(somModelName, 'rb') somClus = pkl.load(fileFeatPKL)
import numpy as np import pandas as pd import matplotlib.pyplot as plt # ==== Preprocessing ==== X = pd.read_csv('txsDataWithLabels/txsWithLabels.csv', header=0, usecols=[ 'prediction', 'n_in', 'n_out', 'amount_in', 'amount_out', 'change', 'diffN', 'SA' ]) y = X['prediction'].to_numpy() X = X.drop('prediction', axis=1).to_numpy() plt.scatter(X[:, 3], X[:, 4], c=y) plt.show() # Classify an plot som = susi.SOMClustering(n_rows=30, n_columns=30, random_state=12) som.fit(X) print('SOM fitted!') #U-Matrix u_matrix = som.get_u_matrix() plot_umatrix(u_matrix, 30, 30) plt.show() #BMUs som_array = som.unsuper_som_ # weight vectors of the SOM bmus = som.get_bmus(X, som_array) # Best Match Unit per sample
#Training the SOM # The susi package needs to be installed. It can be found in https://pypi.org/project/susi/ import susi #n_iter_unsupervised: number of iterations #n_rows: rows of the SOM grid #n_columns: columns of the SOM grid som = susi.SOMClustering(n_iter_unsupervised=1000000, n_rows=20, n_columns=15) som.fit(X_train) #export the trained SOM with open('SOM_trained.p', 'wb') as outfile: pickle.dump(som, outfile) #load the trained SOM import pickle with open('SOM_trained.p', 'rb') as infile: som = pickle.load(infile)