def train_som(som_width: int, som_height: int, df: pd.core.frame.DataFrame, df_train: pd.core.frame.DataFrame, df_test: pd.core.frame.DataFrame, df_train_columns: pd.core.frame.DataFrame, n_iter: int, sigma=0.3, learning_rate=0.01): """ Trains self-organizing map and returns train and test datasets with predicted clusters. Arguments: som_width - width of som map som_height - height of som map df - initially prepared dataset df_train - training dataset df_test - testing dataset df_train_columns - list of columns of training dataset n_iter - number of iteration during training sigma - sigma parameter for the model learning_rate - learning rate Returns: final_df_train - training dataset with predicted cluster final_df_test - testing dataset with predicted cluster """ som = MiniSom(som_width, som_height, df_train.shape[1], sigma=sigma, learning_rate=learning_rate, random_seed=0) som.train(df_train, n_iter) # converting numpy arrays to dataframes df_train = pd.DataFrame(df_train, columns=df_train_columns) df_test = pd.DataFrame(df_test, columns=df_train_columns) # creating column with cluster basing on model prediction df_train['cluster'] = df_train.apply(lambda x: som_predict(x, som), axis=1) df_test['cluster'] = df_test.apply(lambda x: som_predict(x, som), axis=1) # joining train and test dataframes with previously dropped columns, which will be useful in the further part of # the script final_df_train = df_train.join( df[['Date', 'Price', 'close_plus_20_days', 'profit']].iloc[:, :len(df_train)], lsuffix='_org') final_df_test = df_test.join( df[['Date', 'Price', 'close_plus_20_days', 'profit']].iloc[len(df_train):], lsuffix='_org') return final_df_train, final_df_test
def test_train_random(self): som = MiniSom(5, 5, 2, sigma=1.0, learning_rate=0.5, random_seed=1) data = array([[4, 2], [3, 1]]) q1 = som.quantization_error(data) som.train_random(data, 10) assert q1 > som.quantization_error(data) data = array([[1, 5], [6, 7]]) q1 = som.quantization_error(data) som.train_random(data, 10, verbose=True) assert q1 > som.quantization_error(data)
def run_som(features, size_x, xize_y, niter=10000, sigma=0.3, learning_rate=.5, pca=True, plot_error=False, random_seed=1): som = MiniSom(size_x, xize_y, features.shape[1], sigma=sigma, learning_rate=learning_rate, random_seed=random_seed) if pca == True: som.pca_weights_init(features) else: som.random_weights_init(features) if plot_error == True: q_error = [] t_error = [] iter_x = [] for i in range(niter): percent = 100 * (i + 1) / niter rand_i = np.random.randint( len(features)) # This corresponds to train_random() method. som.update(features[rand_i], som.winner(features[rand_i]), i, niter) if (i + 1) % 1000 == 0: q_error.append(som.quantization_error(features)) #print( q_error[-1] ) t_error.append(som.topographic_error(features)) iter_x.append(i) """ plt.plot(iter_x, q_error) plt.ylabel('quantization error') plt.xlabel('iteration index') plt.show() plt.plot(iter_x, t_error) plt.ylabel('topo error') plt.xlabel('iteration index') plt.show() """ return som, iter_x, q_error, t_error else: som.train_random(features, niter) return som
def credit_fraud(X): sc = MinMaxScaler(feature_range=(0, 1)) X = sc.fit_transform(X) # TODO: Maximum price of the data maximum_price = np.max(X) minimum_price = np.min(X) # TODO: Mean price of the data mean_price = np.mean(X) # TODO: Median price of the data median_price = np.median(X) # TODO: Standard deviation of prices of the data std_price = np.std(X) # Show the calculated statistics print("Conducting Analysis On The Data Given:\n") print('min price: {}'.format(minimum_price)) print('max price: {}'.format(maximum_price)) print('mean price: {}'.format(mean_price)) print('median of prices: {}'.format(median_price)) print("Standard deviation of prices: {} ".format(std_price)) # Training the SOM from minisom import MiniSom som = MiniSom(x=10, y=10, input_len=14, sigma=1.0, learning_rate=0.5) #Quanization = 1 som.random_weights_init(X) som.train_random(data=X, num_iteration=300) # som.quantization(X) # Visualizing the results from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som.winner(x) print(w) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]], markeredgecolor=colors[y[i]], markerfacecolor='b', markersize=10, markeredgewidth=2) plt.show() # Finding the frauds mappings = som.win_map(X) display(mappings) frauds = np.concatenate((mappings[(8, 1)], mappings[(8, 1)]), axis=0) if frauds in w: print('fraud detected') else: print('Fraud Not Detected') return som, frauds
def __init__(self, path): preProcess = PreProcess() data = preProcess.pre_processing(path) self.X = data self.sc = MinMaxScaler(feature_range=(0, 1)) self.X = self.sc.fit_transform(self.X) print(self.X.shape) self.som = MiniSom(x=10, y=10, input_len=93, sigma=1.0, learning_rate=0.5)
def __init__(self, som_shape, data_dimension, sigma=1.0, learning_rate=0.5, neighborhood_function="gaussian", distance_metric="euclidean", random_seed=None, scaling_factor_plot=6): self.som_shape = som_shape self.sf = scaling_factor_plot self.distance_metric = distance_metric self.som = MiniSom(som_shape[0], som_shape[1], data_dimension, sigma=sigma, learning_rate=learning_rate, neighborhood_function=neighborhood_function, distance_metric=distance_metric)
def main(): if(len(sys.argv) != 2): print("Usage: python cellsSom.py inputFile") exit(1) X = np.loadtxt(sys.argv[1]) mdl = MiniSom(20, 20, 10561) mdl.random_weights_init(X) mdl.train_batch(X, 100) joblib.dump(mdl, 'cellsSomModel.sav') plt.pcolor(mdl.distance_map().T) plt.show()
def MySOM(im, imageType, numClusts): height = im.shape[0] width = im.shape[1] bands = im.shape[2] print 'image size is: ', height, '*', width, '*', bands im_change = im.reshape(height * width, bands) if imageType == 'RGB': im_float = np.float32(im_change) som = MiniSom(numClusts, 1, 3, sigma=0.1, learning_rate=0.5) som.random_weights_init(im_float) som.train_random(im_float, 100) qnt = som.quantization(im_float) z = som.get_weights().reshape(numClusts, 3) elif imageType == 'Hyper': pca = PCA(n_components=3) im_reduced = pca.fit_transform(im_change) im_float = np.float32(im_reduced) som = MiniSom(numClusts, 1, 3, sigma=0.1, learning_rate=0.5) som.random_weights_init(im_float) som.train_random(im_float, 100) qnt = som.quantization(im_float) z = som.get_weights().reshape(numClusts, 3) z = np.sum(z, axis=1) z = z.tolist() output = [] for i, x in enumerate(qnt): output += [z.index(np.sum(x))] output = np.array(output) output = output.reshape(height, width) if imageType == 'RGB': cc_image = cl(output, connectivity=2) labels_filtered = median_filter(output, 7) return labels_filtered, cc_image else: labels_filtered = median_filter(output, 7) return labels_filtered
def som_mapping(self, x_n, y_n, d, sigma, lr, batch_size, neighborhood='gaussian', seed=10): """ Perform SOM on transform data Parameters ---------- x_n : int the dimension of expected map y_n : int the dimension of expected map d : int vector length of input df sigma : float the standard deviation of initialized weights lr : float learning rate batch_size : int iteration times neighborhood : string e.g. 'gaussian', the initialized weights' distribution seed : int for reproducing """ som = MiniSom(x_n, y_n, d, sigma, lr, neighborhood_function=neighborhood, random_seed=seed) # initialize the map #df to list numpy : data.values.tolist() #df to array numpy : data.values som.pca_weights_init(self.data.values) # initialize the weights print("Training...") som.train_batch(self.data.values, batch_size, verbose=True) # random training print("\n...ready!") self.x_n = x_n self.y_n = y_n self.weights = som.get_weights() self.flatten_weights = self.weights.reshape(x_n * y_n, d) self.map_som = som
def train_with_cnn_features(path): f = h5.File(path, 'r') features, t_labels, pred_labels = f['features'], f['true_labels'], f[ 'pred_labels'] som = MiniSom(16, 16, 128, sigma=0.7, learning_rate=0.2) # initialization of SOM som.random_weights_init(features) # print(som.get_weights()) som.train_random(features, 10000) # trains the SOM # print(som.get_weights()) print("model trained") som_plot(som, features, data_name=None, labels=pred_labels)
def get_som(self): #基于MFCC特征数据集获得聚类数据集 mfcc = self.get_mfcc() som = MiniSom( self.x, self.y, input_len=20, sigma=self.sigma, learning_rate=self.learning_rate ) #输入拓扑网络结构,输入向量长度对应特征数量,SOM中不同相邻节点的半径定义为0.1,迭代期间权重的的调整幅度定义为0.2 som.random_weights_init(mfcc) #将SOM的权重初始化为小的标准化随机值 som.train_random(mfcc, self.epoch) som_data = som.quantization(mfcc) #将mfcc特征数据用类簇数据替换 return som_data #输出聚类数据集
def set_som(self, sigma, learning_rate): ''' initializes the network: by default 50x50 with 0.1 sigma and 1.5 lr is initialized ''' self.som = MiniSom(x=self.network_h, y=self.network_w, input_len=32, sigma=sigma, learning_rate=learning_rate) self.som.random_weights_init(self.data)
def create_fp2(word_vectors): SOM = MiniSom(var_dict['H'], var_dict['W'], var_dict['N'], sigma=1.0, random_seed=1) SOM._weights = var_dict['codebook'] #a = np.zeros((var_dict['H'], var_dict['W']), dtype=np.int) idx = word_vectors['idx'] bmu = SOM.winner(word_vectors['vector']) return {word_vectors['counts']: bmu}
def soma(array, fcount): array = np.array(array) size = len(array[0]) som = MiniSom(len(array), fcount, size, sigma=0.8, learning_rate=0.7) som.random_weights_init(array) som.train_batch(array, 10) results = [] for val in array.tolist(): results.append([som.winner(val)[1]] + val) return results
def som_mapping(self, x_n, y_n, d, sigma, lr, neighborhood='gaussian', seed=10, epochs=10000): """ Perform SOM on transform data Parameters ---------- x_n : int the dimension of expected map y_n : int the dimension of expected map d : int vector length of input df sigma : float the standard deviation of initialized weights lr : float learning rate neighborhood : string e.g. 'gaussian', the initialized weights' distribution seed : int for reproducing :param epochs: """ som = MiniSom(x_n, y_n, d, sigma, lr, neighborhood_function=neighborhood, random_seed=seed) # initialize the map som.pca_weights_init(self.data) # initialize the weights print("Training...") som.train(self.data, epochs) # random training print("\n...ready!") self.model = som flatten_weights = self.model.get_weights().reshape(self.x_n * self.y_n, self.d) if not self.pretrained: # initialize cluster cluster_ = ConsensusCluster(AgglomerativeClustering, self.clusters - self.explore_clusters, self.clusters + self.explore_clusters, 3) k = cluster_.get_optimal_number_of_clusters(flatten_weights, verbose=True) # fitting SOM weights into clustering algorithm self.cluster = cluster_.cluster_(n_clusters=k).fit(flatten_weights) if self.save: pickle.dump(self.cluster, open("models/som_clustering.p", "wb")) else: with open('models/som_clustering.p', 'rb') as infile: self.cluster = pickle.load(infile) self.flatten_weights = flatten_weights
def setUp(self): self.som = XPySom(5, 5, 1, topology='hexagonal', std_coeff=np.sqrt(np.pi)) self.minisom = MiniSom(5, 5, 1, topology='hexagonal') for i in range(5): for j in range(5): # checking weights normalization np.testing.assert_almost_equal(1.0, np.linalg.norm(self.som._weights[i, j])) self.som._weights = np.zeros((5, 5, 1)) # fake weights self.som._weights[2, 3] = 5.0 self.som._weights[1, 1] = 2.0 np.random.seed(1234) cp.random.seed(1234)
def som(X, **kwargs): size = kwargs.get("size", 50) epochs = kwargs.get("epochs", 10000) random_state = kwargs.get("random_state", 42) som = MiniSom(size, size, len(X[0]), neighborhood_function='gaussian', sigma=1.5, random_seed=random_state) som.pca_weights_init(X) som.train_random(X, epochs, verbose=True)
def init_som(self, widget=None, data=None): ##print self.data ### Initialization and training ### cols = self.columns[self.combobox.get_active()] data = self.data[:, 0:len(cols)] #print len(cols) self.som = MiniSom(self.width_spin_button.get_value_as_int(), self.height_spin_button.get_value_as_int(), len(cols), sigma=1.2, learning_rate=0.5) # self.som.weights_init_gliozzi(data) self.som.random_weights_init(data)
def main(): if (len(sys.argv) != 2): print("Usage: python genesSom.py input") exit(1) columns = [i for i in range(1, 6483)] X = np.loadtxt(sys.argv[1], delimiter=",", skiprows=1, usecols=columns) mdl = MiniSom(26, 26, 6482) #mdl.random_weights_init(X) mdl.pca_weights_init(X) mdl.train_batch(X, 100) joblib.dump(mdl, '"genesSomModel.sav') plt.pcolor(mdl.distance_map().T) plt.show()
def plot_SOM(data_name, data_df, mesh, style='jet', nc=5, learning_rate=0.5, sigma=0.5, drop=[ 'ccx', 'ccy', 'ccz', 'T', 'Chi', 'PV', 'f_Bilger', 'non-eq', 'PV_norm', 'Chi_norm', 'PV_compute' ]): X_ = data_df.copy() X = X_.drop(drop, axis=1) model = MiniSom(nc, 1, X.shape[1], sigma=sigma, learning_rate=learning_rate) # get the cluster labels model.train_random(X, 200) z = [] for cnt, xy in enumerate(X): z.append(model.winner(xy)[0]) # plot the clusters cmap = plt.get_cmap('jet', nc) plot_field(data_name, mesh, 'SOM', z, cmap) plt.figure() plt.scatter(data_df['f_Bilger'], data_df['T'], s=0.5, c=zz, cmap=cmap) plt.colorbar(ticks=range(n_clusters)) plt.title('DBSCAN cluster') X['label'] = z sub = pd.DataFrame() for i in set(z): data_sub = X[X['label'] == i].drop(['label'], axis=1) print(data_sub) # sub.append(npc(data_sub)) sub[str(i)] = npc(data_sub) # sub[str(i)] = np.asarray(sub) plt.show(block=False) return df, cmap, sub
def somTrained(features, x=10, y=10, sigma=1.0, learning_rate=0.3, num_iteration=100): num_features = features.shape[1] som = MiniSom(x=x, y=y, input_len=num_features, sigma=sigma, learning_rate=learning_rate) som.random_weights_init(features) som.train_random(data=features, num_iteration=num_iteration) return som
def graph(self,dataset,X,y): from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range = (0, 1)) X = sc.fit_transform(X) from minisom import MiniSom som = MiniSom(x = 10, y = 10, input_len = 15, sigma = 1.0, learning_rate = 0.5) som.random_weights_init(X) som.train_random(data = X, num_iteration = 100) from pylab import bone, pcolor, colorbar, plot, show bone() pcolor(som.distance_map().T) colorbar() markers = ['o', 's'] colors = ['r', 'g'] for i, x in enumerate(X): w = som.winner(x) plot(w[0] + 0.5, w[1] + 0.5, markers[y[i]], markeredgecolor = colors[y[i]], markerfacecolor = 'None', markersize = 10, markeredgewidth = 2) show() self.entry1_value = IntVar() self.entry2_value = IntVar() self.entry3_value = IntVar() self.entry4_value = IntVar() self.entry1 = Entry(root,textvariable=self.entry1_value,width=25) self.entry2 = Entry(root,textvariable=self.entry2_value,width=25) self.entry3 = Entry(root,textvariable=self.entry3_value,width=25) self.entry4 = Entry(root,textvariable=self.entry4_value,width=25) self.entry1.grid(row=6, column=2) self.entry2.grid(row=6, column=4) self.entry1.grid(row=7, column=2) self.entry1.grid(row=7, column=4) mappings = som.win_map(X) frauds = np.concatenate((mappings[(self.entry1,self.entry2)], mappings[(self.entry3,self.entry4)]), axis = 0) frauds = sc.inverse_transform(frauds) self.trainbutton= Button(text="Get probabilities", command=lambda: self.neural(dataset,frauds)) self.trainbutton.grid(row=8, column=4)
def generate_minisom(X, algo): map_dim = 25 N = X.shape[1] # Nb. features (vectors dimension) print('number of features in SOM: {}'.format(N)) som = MiniSom(map_dim, map_dim, N, sigma=1.0, random_seed=1) #som.random_weights_init(X) t1 = time() #som.train_batch(X, 10*X.shape[0]) if algo == 'BATCH': som.train_batch(X, 500) elif algo == 'RANDOM': som.train_random(X, 500) t2 = time() print("\nTime taken by training {} minisom\n----------\n{} s".format( algo, (t2 - t1)))
def train( img, img_data, ): som = MiniSom(x=3, y=3, input_len=3, sigma=0.1, learning_rate=0.1) som.random_weights_init(img_data) starting_weights = som.get_weights().copy().astype('uint8') som.train_random(img_data, 1250) reduced_img = np.zeros(img.shape) quantized = som.quantization(img_data) for i, q in enumerate(quantized): reduced_img[np.unravel_index(i, shape=(img.shape[0], img.shape[1]))] = q learnt_weights = som.get_weights().astype('uint8') reduced_image = reduced_img.astype('uint8') return starting_weights, reduced_image, learnt_weights
def fake_data(): data = fake #parameter for som training is number of iterations, should be comparable to number of samples for best results iteration = len(data) #train som som = MiniSom(5, 5, pixels, sigma=0.5, learning_rate=0.5) som.random_weights_init(data) som.train_random(data, iteration) #plt.pcolor(som.distance_map().T, cmap='bone_r') #show some results print("results for fake data\n") plt.figure(figsize=(8, 7)) frequencies = som.activation_response(data) plt.pcolor(frequencies.T, cmap='Blues') plt.colorbar() plt.show() act = som.activation_response(data) #print(act) #print(som.quantization_error(data)) #print(som.topographic_error(data)) #print(som.winner(data[1])) #som.labels_map(data, legend_fake) results = [] size = len(data) for i in range(0,size): results.append(som.winner(data[i])) pairs = [] for i in range(0, size): pairs.append([]) for j in range(0, size): if results[i]==results[j]: pairs[i].append(j) return pairs
def do_som(data): print('MiniSom wird erstellt.') som = MiniSom(10, 10, 20, sigma=1, learning_rate=0.2) # initialization of 6x6 SOM print('Erstellung abgeschlossen.') print('Training beginnt.') som.train_random(data, 100000, True) # trains the SOM with 100 iterations print('Training abgeschlossen.') #print(som.get_weights()) print('') print('win_map') #array=[] #for i in range(0,10): # arr=[] # print(i) # for j in range(0,10): # arr.append(0) # array.append(arr) #for cluster in tqdm(som.win_map(data)): # array[cluster[0]][cluster[1]]=len(som.win_map(data)[cluster]) #print(som.win_map(data)[cluster]) #print(cluster) #print() #print(array) #plt.imshow(array, cmap='Blues', interpolation='nearest') #plt.show() #quit() print('') print('distance map') print(som.distance_map()) plt.imshow(som.distance_map(), cmap='Blues', interpolation='nearest') plt.show() print('') print('activation_response') #print(som.activation_response(data)) plt.imshow(som.activation_response(data), cmap='Blues', interpolation='nearest') plt.show() print(som.activation_response(data))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--feature_path', default='data/features/', type=str) parser.add_argument('--train_num', default=5, type=int) parser.add_argument('--val_num', default=0, type=int) parser.add_argument('--test_num', default=5, type=int) parser.add_argument('--iter', default=1000, type=int) parser.add_argument('--map_size', default=3, type=int) parser.add_argument('--neighborhood_function', default='triangle', type=str) parser.add_argument('--learning_rate', default=0.06, type=int) parser.add_argument('--sigma', default=2, type=int) opt = parser.parse_args() # ms = [50, 100, 150, 200, 250, 300, 350, 10304] ms = [350] faces = [0, 1] # face classes to be used for m in ms: features = np.load(opt.feature_path + 'feature_' + str(m) + '.npy') # pca features features = np.concatenate((features[faces[0] * 10:faces[0] * 10 + 10], features[faces[1] * 10:faces[1] * 10 + 10]), axis=0) # 只取两类人脸 # features = features[10:30] features_ = get_sets(features, opt.train_num, opt.val_num, opt.test_num) som = MiniSom(opt.map_size, opt.map_size, m, sigma=opt.sigma, learning_rate=opt.learning_rate, neighborhood_function=opt.neighborhood_function) som.random_weights_init(features_['x_train']) som.train_batch(features_['x_train'], opt.iter) # visualization - labels_record preds = som.labels_map(features_['x_train'], features_['d_train']) fig = plt.figure(figsize=(opt.map_size, opt.map_size)) grid = plt.GridSpec(opt.map_size, opt.map_size) for position in preds.keys(): label_fracs = [preds[position][l] for l in [0, 1]] plt.subplot(grid[position[0], position[1]], aspect=1) patches, texts = plt.pie(label_fracs) fig.legend(patches, ['face%d' % p for p in [0, 1]], loc='upper center', ncol=2) fig.savefig('imgs/som_%d' % opt.map_size)
def _minisomrandom(self): """Clusters sentence vectors using minisomrandom algorithm Returns ------- numpy ndarray codebook (weights) of the trained SOM """ H = int(self.opts['size']) W = int(self.opts['size']) N = self.X.shape[1] som = MiniSom(H, W, N, sigma=1.0, random_seed=1) if self.opts['initialization']: som.random_weights_init(self.X) som.train_random(self.X, self.opts['niterations']) return som.get_weights()
def create_fp(word_vectors): SOM = MiniSom(var_dict['H'], var_dict['W'], var_dict['N'], sigma=1.0, random_seed=1) SOM._weights = var_dict['codebook'] a = np.zeros((var_dict['H'], var_dict['W']), dtype=np.int) for key, value in word_vectors.items(): #print (key, type(value), len(value)) for val in value: idx = val['idx'] bmu = SOM.winner(val['vector']) a[bmu[0], bmu[1]] += val['counts'] return {key: a}
def som(self, image_to_segment, original_image, im_size, cluster): som = MiniSom(cluster, 1, 3, sigma=0.1, learning_rate=0.5) som.random_weights_init(image_to_segment) som.train_random(image_to_segment, 100) qnt = som.quantization(image_to_segment) z = som.get_weights().reshape(cluster, 3) z = np.sum(z, axis=1) z = z.tolist() labels = [] for i, x in enumerate(qnt): labels += [z.index(np.sum(x))] labels = np.array(labels) som_image, bounded_image = clustering(labels, original_image, cluster) # create binary image. Contains information about image boundary. return som_image, bounded_image