# -3.63193668, 4.39840442, -1.52225387, -5.26723347, 1.34990893, # -5.72045913, 1.37062561]) energies = np.array([ -4.00896773, -7.54098962, -0.23402838, 3.14820036, -5.44986465, -6.16460973, 2.81354172, -3.84467562, -6.34558658, 0.13278807, -4.91226052, 2.78108339 ]) for ii in range(100): SEQUENCE = sequences[ii] print ii start = SEQUENCE.index(ms2_hairpin) end = start + len(ms2_hairpin) tf_bpp = np.array( tf.RNA(SEQUENCE, False, list(energies), True, False).get_bpp_full()) vienna_bpp = np.loadtxt( "vienna_bpp_data/{}_bpp_matrix.txt".format(SEQUENCE)) tf_bpp_matrix = pd.DataFrame(tf_bpp + tf_bpp.transpose(), index=list(SEQUENCE), columns=list(SEQUENCE)) vienna_bpp_matrix = pd.DataFrame(vienna_bpp + vienna_bpp.transpose(), index=list(SEQUENCE), columns=list(SEQUENCE)) subtitle_fs = 12. fig, ax = plt.subplots(figsize=(15, 5), ncols=2, nrows=1) if SEQUENCE in trouble:
import numpy as np import random L = 50 sequence_length = np.arange(5, L + 1) seq_len_3 = [] bpp = [] bpp_grad = [] part = [] grad = [] num = 100 energy = [5.69, 6., 4.09, -7.09] for l in range(5, L + 1): seq_len_3.append(7.6e-5 * l**2 + 0.01) start = time.clock() for i in range(num): sequence = ''.join(random.choice('AUGC') for _ in range(l)) parts = tf.RNA(sequence, False, energy, False, False) end = time.clock() bpp.append(1e3 * (end - start) / num) np.savetxt("n3_partition_time.txt", bpp, delimiter=',') plt.plot(sequence_length, seq_len_3, 'k') plt.plot(sequence_length, bpp, 'b', linewidth=2) plt.show()
]) '''BPP TRAINING DATA''' num_training_examples = 100 actual_bpp = [ ] # storing log(bpp) for closing stem of hairpin for each input sequence sequences = Sequence[: num_training_examples] #fil.Sequence[:num_training_examples] for i in range(num_training_examples): #ms2_prob = 1. #bp = ms2_hairpin_basepairs[i] #for mm in range(7): # ms2_prob *= tf.RNA(sequences[i], False, list(energies), True, False).get_bpp(bp[2*mm], bp[2*mm + 1]) bp = closing_bp_indices[i] #fil.closing_bp_indices[i] rna.append( np.log(10.) - tf.RNA(sequences[i], False, list(energies), True, False).get_log_bpp(bp[0], bp[1])) #rna.append(10./tf.RNA(sequences[i], False, list(energies), True, False).get_bpp(bp[0], bp[1])) #rna.append(np.log(10.) - np.log(ms2_prob)) actual_bpp.append(np.log(KDnoligand[i])) #actual_bpp.append(KDnoligand[i]) print 'finished gathering training data' rna = np.array(rna) actual_bpp = np.array(actual_bpp) RMSD = np.mean((rna - actual_bpp)**2) plt.title('R101 Synthetic w/ imperfect prior, hella far initial start') plt.text(4, 17.5, "RMSD = %.2f" % RMSD) #plt.scatter(actual_bpp, rna2, c='m', label = 'Full') #plt.scatter(actual_bpp, rna1, c='c', label = 'Kd < 25')
import filtering as fil rna = [] alpha = 1. w = 1e-2 energies = np.arange(-10, 2) '''BPP TRAINING DATA''' num_training_examples = 1000 actual_bpp = [ ] # storing bpp for closing stem of hairpin for each input sequence #energy_param = p.energies sequences = fil.Sequence[:num_training_examples] #p.training_sequences[:10] for i in range(num_training_examples): rna.append(tf.RNA(sequences[i], False, list(energies), True, False)) actual_bpp.append(np.log(1e-9) - np.log(fil.KDnoligand[i])) print 'finished gathering training data' guess = np.zeros(12) def cost(param, i, j): l2 = 0. for mm in range(i, j): rna[mm].update_energy(list(param)) bp = fil.closing_bp_indices[mm] l2 += alpha * (actual_bpp[mm] - rna[mm].get_log_bpp(bp[0], bp[1]))**2 prior = guess - param l2 += w * np.dot(prior, prior) return l2
0.45 ] Sequence = np.loadtxt("experimental/R101_Sequence.txt", dtype='string') closing_bp_indices = np.loadtxt("experimental/R101_closing_bp_indices.txt", dtype='int') ms2_hairpin_basepairs = np.loadtxt( "experimental/R101_ms2_hairpin_basepairs.txt", dtype='int') closingBP_kd = [] fullHP_kd = [] n = len(Sequence) for mm in range(n): bp = closing_bp_indices[mm] closingBP_kd.append(10. / tf.RNA(Sequence[mm], False, energies, True, False).get_bpp(bp[0], bp[1])) hp = ms2_hairpin_basepairs[mm] prob = 1. for ii in range(7): prob *= tf.RNA(Sequence[mm], False, energies, True, False).get_bpp(hp[2 * ii], hp[2 * ii + 1]) fullHP_kd.append(10. / prob) print mm #kd.append(np.log(1e-9) - tf.RNA(fil.Sequence[i], False, energies, True, False).get_log_bpp(bp[0],bp[1])) #log_exp_kd.append(np.log(fil.KDnoligand[i])) np.savetxt("synthetic/R101_closing_basepair_KD.txt", closingBP_kd, delimiter='\t') np.savetxt("synthetic/R101_full_ms2_KD.txt", fullHP_kd, delimiter='\t',
import tinyfold as tf import scipy.optimize as so import matplotlib.pyplot as plt rna = [] energies = [5.69, 6., 4.09, -7.09] actual_bpp = [] w = 0.01 f = open("sequences_train.txt", 'r') b = f.readline() i = 0 data = 0. while b: rna.append(tf.RNA(b, False, energies, True, True)) data += len(b)**2 actual_bpp.append(np.array(rna[i].get_bpp_full())) actual_bpp[i][np.isinf(actual_bpp[i])] = 0. b = f.readline() i += 1 f.close() #guess = np.array([5., 4., 4., -6.]) guess = np.arange(-20, -16) def cost(param, i, j): #param = np.array([p, 6., 4.09, -7.09]) l2 = 0. for mm in range(i, j):
'''R101 DATA''' energies = np.array([]) #energies = np.array([-0.93, -1.1, -1.33, -2.08, -2.11, -2.24, -2.35, -2.36, -3.26, -3.42, 4.09, 0.45]) '''BPP TRAINING DATA''' num_training_examples = 100 actual_bpp = [ ] # storing log(bpp) for closing stem of hairpin for each input sequence trouble_sequences = [] sequences = Sequence[: num_training_examples] #fil.Sequence[:num_training_examples] for mm in range(num_training_examples): bp = closing_bp_indices[mm] #fil.closing_bp_indices[i] rna.append( tf.RNA(sequences[mm], False, list(energies), True, False).get_bpp(bp[0], bp[1])) actual_bpp.append(vienna_closing_bpp[mm]) #rna.append(np.log(10.) - tf.RNA(sequences[mm], False, list(energies), True, False).get_log_bpp(bp[0], bp[1])) #actual_bpp.append(np.log(vienna_closing_bpp[mm])) #ms2_prob = 1. #bp = ms2_hairpin_basepairs[mm] #for ii in range(7): # ms2_prob *= tf.RNA(sequences[mm], False, list(energies), True, False).get_bpp(bp[2*ii], bp[2*ii + 1]) #rna.append(ms2_prob) #if ((ms2_prob < 1e-5) and (vienna_ms2_bpp[mm] > 1e-5)): # trouble_sequences.append(sequences[mm]) #actual_bpp.append(vienna_ms2_bpp[mm]) #rna.append(np.log(ms2_prob)) #actual_bpp.append(np.log(vienna_ms2_bpp[mm])) #rna.append(ms2_prob) #actual_bpp.append(10./KDnoligand[mm])