Exemple #1
0
#       -3.63193668,  4.39840442, -1.52225387, -5.26723347,  1.34990893,
#       -5.72045913,  1.37062561])
energies = np.array([
    -4.00896773, -7.54098962, -0.23402838, 3.14820036, -5.44986465,
    -6.16460973, 2.81354172, -3.84467562, -6.34558658, 0.13278807, -4.91226052,
    2.78108339
])

for ii in range(100):
    SEQUENCE = sequences[ii]
    print ii
    start = SEQUENCE.index(ms2_hairpin)
    end = start + len(ms2_hairpin)

    tf_bpp = np.array(
        tf.RNA(SEQUENCE, False, list(energies), True, False).get_bpp_full())

    vienna_bpp = np.loadtxt(
        "vienna_bpp_data/{}_bpp_matrix.txt".format(SEQUENCE))

    tf_bpp_matrix = pd.DataFrame(tf_bpp + tf_bpp.transpose(),
                                 index=list(SEQUENCE),
                                 columns=list(SEQUENCE))
    vienna_bpp_matrix = pd.DataFrame(vienna_bpp + vienna_bpp.transpose(),
                                     index=list(SEQUENCE),
                                     columns=list(SEQUENCE))

    subtitle_fs = 12.

    fig, ax = plt.subplots(figsize=(15, 5), ncols=2, nrows=1)
    if SEQUENCE in trouble:
Exemple #2
0
import numpy as np
import random

L = 50

sequence_length = np.arange(5, L + 1)
seq_len_3 = []
bpp = []
bpp_grad = []
part = []
grad = []

num = 100

energy = [5.69, 6., 4.09, -7.09]

for l in range(5, L + 1):
    seq_len_3.append(7.6e-5 * l**2 + 0.01)
    start = time.clock()
    for i in range(num):
        sequence = ''.join(random.choice('AUGC') for _ in range(l))
        parts = tf.RNA(sequence, False, energy, False, False)
    end = time.clock()
    bpp.append(1e3 * (end - start) / num)

np.savetxt("n3_partition_time.txt", bpp, delimiter=',')

plt.plot(sequence_length, seq_len_3, 'k')
plt.plot(sequence_length, bpp, 'b', linewidth=2)
plt.show()
Exemple #3
0
])
'''BPP TRAINING DATA'''
num_training_examples = 100
actual_bpp = [
]  # storing log(bpp) for closing stem of hairpin for each input sequence

sequences = Sequence[:
                     num_training_examples]  #fil.Sequence[:num_training_examples]
for i in range(num_training_examples):
    #ms2_prob = 1.
    #bp = ms2_hairpin_basepairs[i]
    #for mm in range(7):
    #    ms2_prob *= tf.RNA(sequences[i], False, list(energies), True, False).get_bpp(bp[2*mm], bp[2*mm + 1])
    bp = closing_bp_indices[i]  #fil.closing_bp_indices[i]
    rna.append(
        np.log(10.) - tf.RNA(sequences[i], False, list(energies), True,
                             False).get_log_bpp(bp[0], bp[1]))
    #rna.append(10./tf.RNA(sequences[i], False, list(energies), True, False).get_bpp(bp[0], bp[1]))
    #rna.append(np.log(10.) - np.log(ms2_prob))
    actual_bpp.append(np.log(KDnoligand[i]))
    #actual_bpp.append(KDnoligand[i])
print 'finished gathering training data'

rna = np.array(rna)
actual_bpp = np.array(actual_bpp)

RMSD = np.mean((rna - actual_bpp)**2)

plt.title('R101 Synthetic w/ imperfect prior, hella far initial start')
plt.text(4, 17.5, "RMSD = %.2f" % RMSD)
#plt.scatter(actual_bpp, rna2, c='m', label = 'Full')
#plt.scatter(actual_bpp, rna1, c='c', label = 'Kd < 25')
Exemple #4
0
import filtering as fil

rna = []

alpha = 1.

w = 1e-2
energies = np.arange(-10, 2)
'''BPP TRAINING DATA'''
num_training_examples = 1000
actual_bpp = [
]  # storing bpp for closing stem of hairpin for each input sequence
#energy_param = p.energies
sequences = fil.Sequence[:num_training_examples]  #p.training_sequences[:10]
for i in range(num_training_examples):
    rna.append(tf.RNA(sequences[i], False, list(energies), True, False))
    actual_bpp.append(np.log(1e-9) - np.log(fil.KDnoligand[i]))
print 'finished gathering training data'

guess = np.zeros(12)


def cost(param, i, j):
    l2 = 0.
    for mm in range(i, j):
        rna[mm].update_energy(list(param))
        bp = fil.closing_bp_indices[mm]
        l2 += alpha * (actual_bpp[mm] - rna[mm].get_log_bpp(bp[0], bp[1]))**2
    prior = guess - param
    l2 += w * np.dot(prior, prior)
    return l2
Exemple #5
0
    0.45
]

Sequence = np.loadtxt("experimental/R101_Sequence.txt", dtype='string')
closing_bp_indices = np.loadtxt("experimental/R101_closing_bp_indices.txt",
                                dtype='int')
ms2_hairpin_basepairs = np.loadtxt(
    "experimental/R101_ms2_hairpin_basepairs.txt", dtype='int')

closingBP_kd = []
fullHP_kd = []
n = len(Sequence)

for mm in range(n):
    bp = closing_bp_indices[mm]
    closingBP_kd.append(10. / tf.RNA(Sequence[mm], False, energies, True,
                                     False).get_bpp(bp[0], bp[1]))
    hp = ms2_hairpin_basepairs[mm]
    prob = 1.
    for ii in range(7):
        prob *= tf.RNA(Sequence[mm], False, energies, True,
                       False).get_bpp(hp[2 * ii], hp[2 * ii + 1])
    fullHP_kd.append(10. / prob)
    print mm
    #kd.append(np.log(1e-9) - tf.RNA(fil.Sequence[i], False, energies, True, False).get_log_bpp(bp[0],bp[1]))
    #log_exp_kd.append(np.log(fil.KDnoligand[i]))
np.savetxt("synthetic/R101_closing_basepair_KD.txt",
           closingBP_kd,
           delimiter='\t')
np.savetxt("synthetic/R101_full_ms2_KD.txt",
           fullHP_kd,
           delimiter='\t',
Exemple #6
0
import tinyfold as tf
import scipy.optimize as so
import matplotlib.pyplot as plt

rna = []
energies = [5.69, 6., 4.09, -7.09]
actual_bpp = []

w = 0.01

f = open("sequences_train.txt", 'r')
b = f.readline()
i = 0
data = 0.
while b:
    rna.append(tf.RNA(b, False, energies, True, True))
    data += len(b)**2
    actual_bpp.append(np.array(rna[i].get_bpp_full()))
    actual_bpp[i][np.isinf(actual_bpp[i])] = 0.
    b = f.readline()
    i += 1
f.close()

#guess = np.array([5., 4., 4., -6.])
guess = np.arange(-20, -16)


def cost(param, i, j):
    #param = np.array([p, 6., 4.09, -7.09])
    l2 = 0.
    for mm in range(i, j):
Exemple #7
0
'''R101 DATA'''
energies = np.array([])
#energies = np.array([-0.93, -1.1, -1.33, -2.08, -2.11, -2.24, -2.35, -2.36, -3.26, -3.42, 4.09, 0.45])
'''BPP TRAINING DATA'''
num_training_examples = 100
actual_bpp = [
]  # storing log(bpp) for closing stem of hairpin for each input sequence

trouble_sequences = []

sequences = Sequence[:
                     num_training_examples]  #fil.Sequence[:num_training_examples]
for mm in range(num_training_examples):
    bp = closing_bp_indices[mm]  #fil.closing_bp_indices[i]
    rna.append(
        tf.RNA(sequences[mm], False, list(energies), True,
               False).get_bpp(bp[0], bp[1]))
    actual_bpp.append(vienna_closing_bpp[mm])
    #rna.append(np.log(10.) - tf.RNA(sequences[mm], False, list(energies), True, False).get_log_bpp(bp[0], bp[1]))
    #actual_bpp.append(np.log(vienna_closing_bpp[mm]))
    #ms2_prob = 1.
    #bp = ms2_hairpin_basepairs[mm]
    #for ii in range(7):
    #    ms2_prob *= tf.RNA(sequences[mm], False, list(energies), True, False).get_bpp(bp[2*ii], bp[2*ii + 1])
    #rna.append(ms2_prob)
    #if ((ms2_prob < 1e-5) and (vienna_ms2_bpp[mm] > 1e-5)):
    #    trouble_sequences.append(sequences[mm])
    #actual_bpp.append(vienna_ms2_bpp[mm])
    #rna.append(np.log(ms2_prob))
    #actual_bpp.append(np.log(vienna_ms2_bpp[mm]))
    #rna.append(ms2_prob)
    #actual_bpp.append(10./KDnoligand[mm])