def main(): # generate some random data with 36 features data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36)) data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36)) data = np.vstack((data1, data2)) som = SOM(10, 10) # initialize the SOM som.fit(data, 10000, save_e=True, interval=100 ) # fit the SOM for 10000 epochs, save the error every 100 steps som.plot_error_history( filename='images/som_error.png') # plot the training error history targets = np.array(500 * [0] + 500 * [1]) # create some dummy target values # now visualize the learned representation with the class labels som.plot_point_map(data, targets, ['Class 0', 'Class 1'], filename='images/som.png') som.plot_class_density(data, targets, t=0, name='Class 0', filename='images/class_0.png') som.plot_distance_map(filename='images/distance_map.png' ) # plot the distance map after training
def main(): # generate some virtual peptide sequences libnum = 1000 # 1000 sequences per sublibrary h = Helices(seqnum=libnum) r = Random(seqnum=libnum) n = AMPngrams(seqnum=libnum, n_min=4) h.generate_sequences() r.generate_sequences(proba='AMP') n.generate_sequences() # calculate molecular descirptors for the peptides d = PeptideDescriptor(seqs=np.hstack( (h.sequences, r.sequences, n.sequences)), scalename='pepcats') d.calculate_crosscorr(window=7) # train a som on the descriptors and print / plot the training error som = SOM(x=12, y=12) som.fit(data=d.descriptor, epochs=100000, decay='hill') print("Fit error: %.4f" % som.error) som.plot_error_history(filename="som_error.png") # load known antimicrobial peptides (AMPs) and transmembrane sequences dataset = load_AMPvsTM() d2 = PeptideDescriptor(dataset.sequences, 'pepcats') d2.calculate_crosscorr(7) targets = np.array(libnum * [0] + libnum * [1] + libnum * [2] + 206 * [3]) names = ['Helices', 'Random', 'nGrams', 'AMP'] # plot som maps with location of AMPs som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])), targets, names, filename="peptidesom.png") som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)), filename="density.png") som.plot_distance_map(colormap='Reds', filename="distances.png") colormaps = ['Oranges', 'Purples', 'Greens', 'Reds'] for i, c in enumerate(set(targets)): som.plot_class_density(np.vstack((d.descriptor, d2.descriptor)), targets, c, names, colormap=colormaps[i], filename='class%i.png' % c) # get neighboring peptides (AMPs / TMs) for a sequence of interest my_d = PeptideDescriptor(seqs='GLFDIVKKVVGALLAG', scalename='pepcats') my_d.calculate_crosscorr(window=7) som.get_neighbors(datapoint=my_d.descriptor, data=d2.descriptor, labels=dataset.sequences, d=0)
def run(data, musics): som = SOM(30, 30) # initialize the SOM som.fit(data, 20000) # fit the SOM for 2000 epochs #targets = len(data) * [0] # create some dummy target values # vizualizando as paradas, ver se pego o que eu precido # som.plot_point_map(data, targets, ['class 1', 'class 2'], filename='./results/som.png') # som.plot_class_density(data, targets, 0, filename='./results/class_0.png', names=['a', 'b', 'c'], mode) # som.plot_density_map(data, filename='som1.png') #preparando para clusterizar, denovo winners = som.winner_map(data) #plt.imshow(winners, interpolation='nearest', extent=(0.5,10.5,0.5,10.5)) #plt.colorbar() #plt.show() points = [] for i in range(0, len(winners)): for j in range(0, len(winners[0])): points.append([i, j, winners[i][j]]) # create dendrogram para imprimir #dendrogram = sch.dendrogram(sch.linkage(points, method='ward')) # create clusters hc = AgglomerativeClustering(n_clusters=6, affinity='euclidean', linkage='ward') # save clusters for chart y_hc = hc.fit_predict(points) for j in range(0, len(y_hc)): point = points[j] winners[point[0], point[1]] = y_hc[j] #vizualizando a qualidade da clusterizacao #plt.matshow(winners) #plt.show() for music in musics: winner = som.winner(music.toObject()['valor']) music.setGroupKon(winners[winner[0]][winner[1]])
def main(in_file, out_file, x, y, epochs, ref=None, test=False, verbose=0): if test: df = pd.DataFrame(in_file, columns=range(in_file.shape[1])) else: df = pd.read_table(in_file, sep='\t', low_memory=True, index_col=0) s = df.shape[0] df.dropna(axis=0, how='any', inplace=True) sn = df.shape[0] if s != sn: logger.warning('%d rows dropped due to missing values' % (s - sn)) s = df.shape[1] df = df.select_dtypes(include=[np.number]) sn = df.shape[1] if s != sn: logger.warning('%d columns dropped due to non-numeric data type' % (s - sn)) basedir = os.path.dirname(os.path.abspath(__file__)) som = SOM(x, y) if ref == 'IRCI': som = som.load('/SOM.pkl') embedding = som.winner_neurons(df.values) else: som.fit(df.values, epochs, verbose=verbose) embedding = som.winner_neurons(df.values) if ref == 'Create': som.save(basedir + '/SOM.pkl') emb_df = pd.DataFrame({'ID': df.index}) emb_df['X'] = embedding[:, 1] emb_df['Y'] = embedding[:, 0] if test: return emb_df else: emb_df.to_csv(out_file, index=False, sep='\t')
from data import gen_kura_data from som import SOM import params from visualizer import visualize_history if __name__ == '__main__': X = gen_kura_data(params) som = SOM(resolution=params.resolution, latent_dim=params.latent_dim, sigma_max=params.sigma_max, sigma_min=params.sigma_min, tau=params.tau, seed=params.seed) history = som.fit(X, num_epoch=params.num_epoch) visualize_history(X, history, params)
import numpy as np from som import SOM # generate some random data with 36 features data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36)) data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36)) data = np.vstack((data1, data2)) som = SOM(10, 10) # initialize the SOM som.fit(data, 2000) # fit the SOM for 2000 epochs targets = 500 * [0] + 500 * [1] # create some dummy target values # now visualize the learned representation with the class labels som.plot_point_map(data, targets, ['class 1', 'class 2'], filename='som.png') som.plot_class_density(data, targets, 1, ['class 1', 'class 2'], filename='class_0.png')
import numpy as np from som import SOM # generate some random data with 36 features data1 = np.random.normal(loc=-.25, scale=0.5, size=(500, 36)) data2 = np.random.normal(loc=.25, scale=0.5, size=(500, 36)) data = np.vstack((data1, data2)) som = SOM(10, 10) # initialize the SOM som.fit(data, 10000, save_e=True, interval=100) # fit the SOM for 10000 epochs, save the error every 100 steps som.plot_error_history(filename='images/som_error.png') # plot the training error history targets = np.array(500 * [0] + 500 * [1]) # create some dummy target values print(targets.shape) print(data.shape) # # now visualize the learned representation with the class labels # som.plot_point_map(data, targets, ['Class 0', 'Class 1'], filename='images/som.png') # som.plot_class_density(data, targets, t=0, name='Class 0', filename='images/class_0.png') # som.plot_distance_map(filename='images/distance_map.png') # plot the distance map after training
for col in range(1, num_cols): col_values.append(input_sheet.col_values(col)[1:]) x = np.array(col_values, dtype='|S4') y = x.astype(np.float) maxs = [max(y[col]) for col in range(0, num_cols - 1)] mins = [min(y[col]) for col in range(0, num_cols - 1)] data_points = [] for row in range(1, num_rows): values = [] for col in range(1, num_cols): values.append( (float(input_sheet.cell(row, col).value) - mins[col - 1]) / (maxs[col - 1] - mins[col - 1])) d = DataPoint(values, int(output_sheet.cell(row, 0).value)) data_points.append(d) print(num_rows - 1, " points with dimesion=", num_cols - 1, " are added") return data_points data_points = load_data("722.xlsx") s = SOM(2, 8, 27) s.load_input_data(data_points) s.fit(2, 0.1) v = LVQ(2, 6, 5) v.load_data(data_points) v.train(5, 0.01, 2) s.predict(data_points) v.predict(data_points)
# generate some virtual peptide sequences libnum = 1000 # 1000 sequences per sublibrary h = Helices(seqnum=libnum) r = Random(seqnum=libnum) n = AMPngrams(seqnum=libnum, n_min=4) h.generate_sequences() r.generate_sequences(proba='AMP') n.generate_sequences() # calculate molecular descirptors for the peptides d = PeptideDescriptor(seqs=np.hstack((h.sequences, r.sequences, n.sequences)), scalename='pepcats') d.calculate_crosscorr(window=7) # train a som on the descriptors and print / plot the training error som = SOM(x=12, y=12) som.fit(data=d.descriptor, epochs=100000, decay='hill') print("Fit error: %.4f" % som.error) som.plot_error_history(filename="som_error.png") # load known antimicrobial peptides (AMPs) and transmembrane sequences dataset = load_AMPvsTM() d2 = PeptideDescriptor(dataset.sequences, 'pepcats') d2.calculate_crosscorr(7) targets = np.array(libnum*[0] + libnum*[1] + libnum*[2] + 206*[3]) names = ['Helices', 'Random', 'nGrams', 'AMP'] # plot som maps with location of AMPs som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])), targets, names, filename="peptidesom.png") som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)), filename="density.png") som.plot_distance_map(colormap='Reds', filename="distances.png")
# generate some virtual peptide sequences libnum = 1000 # 1000 sequences per sublibrary h = Helices(seqnum=libnum) r = Random(seqnum=libnum) n = AMPngrams(seqnum=libnum, n_min=4) h.generate_sequences() r.generate_sequences(proba='AMP') n.generate_sequences() # calculate molecular descirptors for the peptides d = PeptideDescriptor(seqs=np.hstack((h.sequences, r.sequences, n.sequences)), scalename='pepcats') d.calculate_crosscorr(window=7) # train a som on the descriptors and print / plot the training error som = SOM(x=10, y=10) som.fit(data=d.descriptor, epochs=10000) print("Fit error: %.4f" % som.error) som.plot_error_history(filename="som_error.png") # load known antimicrobial peptides (AMPs) and transmembrane sequences dataset = load_AMPvsTM() d2 = PeptideDescriptor(dataset.sequences, 'pepcats') d2.calculate_crosscorr(7) targets = np.array(libnum*[0] + libnum*[1] + libnum*[2] + 206*[3]) names = ['Helices', 'Random', 'nGrams', 'AMP'] # plot som maps with location of AMPs som.plot_point_map(np.vstack((d.descriptor, d2.descriptor[206:])), targets, names, filename="peptidesom.png") som.plot_density_map(np.vstack((d.descriptor, d2.descriptor)), filename="density.png") colormaps = ['Oranges', 'Purples', 'Greens', 'Reds']