def predict(self, seq):
        """Predict the secondary structure of RNA sequence.

        Args:
            seq: RNA sequence.

        Returns:
            m: Molecule object with predicted bracket notation.
        """
        prob = [[], []]
        dot = ''
        if self.library == 'mxnet':
            example = mx.io.NDArrayIter(np.array([rna.encode_rna(seq), rna.encode_rna(seq[::-1])]))
            prob = self.model.predict(example)

        if self.library == 'lasagne':
            if self.data_model == 'linear':
                prob = self.model(np.array([rna.encode_rna(seq), rna.encode_rna(seq[::-1])]))
            elif self.data_model == 'matrix':
                prob = self.model(np.array([rna.complementarity_matrix(rna.Molecule(seq)),
                                            rna.complementarity_matrix(rna.Molecule(seq[::-1]))]))
        backwards = False
        if prob[0].max() > prob[1].max():
            max = prob[0].argmax()
        else:
            max = prob[1].argmax()
            backwards = True
        for i, j in self.a.items():
            if j == max:
                dot = i
                break
        if backwards:
            dot = rna.dot_reverse(dot)
        m = rna.Molecule(seq, dot)
        return m
Example #2
0
 def test_substrings_method(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     sub = m.get_substrings(6)
     self.assertEqual(type(sub), list)
     self.assertEqual(len(sub), 1)
     self.assertEqual(sub[0].dot, '(....)')
     self.assertEqual(len(m.get_substrings(3)), 2)
Example #3
0
    def predict(self, molecule):
        """Predict molecule's secondary structure.

        Args:
            molecule: Molecule object whose structure is to be predicted.
        """
        if not molecule.dot:
            molecule.dot = '.' * len(molecule.seq)
        self.t = self.start_t
        self.seq = molecule.seq
        self.n = len(molecule.seq)
        self.neurons = np.random.uniform(0, 0, self.n * (self.n - 1) // 2)
        self.w = self.compute_weights()
        for i in range(self.num_epoch):
            self.epoch()
            self.t += self.t / (i + 1)

        dot = molecule.dot
        for k in range(len(self.neurons)):
            x = self.n - 2 - math.floor(math.sqrt(-8 * k + 4 * self.n * (self.n - 1) - 7) / 2.0 - 0.5)
            y = int(k + x + 1 - self.n * (self.n - 1) / 2 + (self.n - x) * ((self.n - x) - 1) / 2)
            if dot[x] == '.' and dot[y] == '.':
                if self.node_weight(x, y) == self.ni:
                    if self.neurons[k] > 0.5:
                        dot = dot[:x] + '(' + dot[x + 1: y] + ')' + dot[y + 1:]
                if self.node_weight(x, y) == self.ni / 2:
                    if self.neurons[k] > 0.7:
                        dot = dot[:x] + '(' + dot[x + 1: y] + ')' + dot[y + 1:]
        return rna.Molecule(molecule.seq, dot)
    def preprocess(self):
        """Preprocess loaded data.

        Returns:
            X, y: NDArray of sequences and NDArray of labels.
        """
        X = self.X
        y = []
        list = []
        for i in X:
            if self.substrings:
                m = rna.Molecule(i[0, 0], i[0, 1])
                for j in m.get_substrings(self.sequence_length):
                    seq = j.seq
                    dot = j.dot
                    if rna.dot_reverse(dot) in y:
                        seq = seq[::-1]
                        dot = rna.dot_reverse(dot)
                    list.append(rna.encode_rna(seq))
                    y.append(dot)
                    # list.append(rna.encode_rna(j.seq))
                    # y.append(j.dot)
            else:
                if len(i[0, 0]) == self.sequence_length:
                    seq = i[0, 0]
                    dot = i[0, 1]
                    if rna.dot_reverse(dot) in y:
                        seq = seq[::-1]
                        dot = rna.dot_reverse(dot)
                    if self.data_model == 'linear':
                        list.append(rna.encode_rna(seq))
                    elif self.data_model == 'matrix':
                        list.append(rna.complementarity_matrix(rna.Molecule(seq)))
                    y.append(dot)
        X = np.array(list)
        y = y[:self.max_examples]
        z = set(y)
        self.num_labels = len(z)
        self.a = {}
        idx = 0
        for i in z:
            self.a[i] = idx
            idx += 1
        for i in range(len(y)):
            y[i] = self.a[y[i]]
        y = np.array(y)
        return X[:self.max_examples, :], y[:self.max_examples]
Example #5
0
 def test_complementarity_matrix_funtion(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     p = rna.complementarity_matrix(m)
     self.assertEqual(p[0, len(m.seq) - 1], 2)
     self.assertEqual(p[0, len(m.seq) - 2], 2)
     self.assertEqual(p[0, len(m.seq) - 3], 0)
     self.assertEqual(p[m.seq.find('G'), m.seq.find('U')], 1)
     self.assertEqual(p.all(), p.T.all())
Example #6
0
    def mutate(self, molecule):
        """Mutate molecule by inserting or deleting basepairs.

        Args:
            molecule: Molecule that should be mutated.

        Returns:
            mutated: Mutated Molecule.
        """
        m = rna.pair_matrix(molecule)
        seq = molecule.seq
        dot = molecule.dot
        length = len(seq)
        x = random.randrange(length - 5)
        y = random.randrange(x + 5, length)
        if m[x, :].sum() == 0 and m[:, y].sum() == 0:
            dot = dot[:x] + '(' + dot[x + 1: y] + ')' + dot[y + 1:]
        if m[x, y] == 1:
            dot = dot[:x] + '.' + dot[x + 1: y] + '.' + dot[y + 1:]
            dot = self.mutate(rna.Molecule(seq, dot)).dot
        return rna.Molecule(seq, dot)
Example #7
0
    def train(self, X=None, eta=0.001, limit=10, num_iter=5, log=False):
        """Train predictor on example data.

        Args:
            X: List of Molecule objects that are known examples. (use loaded data if None)
            eta: Learning ratio.
            limit: Maximum number of examples to train.
            num_iter: Number of training iterations.
        """
        if X is None:
            if self.X.shape[0] == 0:
                raise Exception("Too few examples.")
            X = []
            for i in self.X:
                if len(i[0, 0]) < 50:
                    X.append(rna.Molecule(i[0, 0], i[0, 1]))
        X = X[:limit]
        for k in range(num_iter):
            for s in X:
                self.predict(s)
                example = []
                pair = rna.pair_matrix(s)
                for i in range(self.n):
                    for j in range(i + 1, self.n):
                        example.append(pair[i, j])
                example = np.array(example)
                for i in range(len(self.neurons)):
                    r, c = self.get_upper_triangular_coordinates(i)
                    for j in range(i + 1, len(self.neurons)):
                        x, y = self.get_upper_triangular_coordinates(j)
                        dif = eta * (math.tanh(np.dot(example, self.w[i])) - math.tanh(np.dot(self.neurons, self.w[i])))
                        if r == x:
                            self.alpha -= dif
                        elif c == y:
                            self.beta -= dif
                        elif r < i < c < j or i < r < j < c:
                            self.gamma -= dif
                        else:
                            self.mi -= dif
                if log:
                    print("Sequence {} trained...".format(s))
        print(self.alpha, self.beta, self.gamma, self.mi)
Example #8
0
 def test_constructor_no_bracket(self):
     seq = 'AUGC'
     molecule = rna.Molecule(seq)
     self.assertEqual(molecule.seq, seq)
Example #9
0
 def test_constructor_incorrect_seq(self):
     seq = 'AUGB'
     with self.assertRaises(Exception):
         rna.Molecule(seq)
Example #10
0
 def test_constructor_with_bracket(self):
     seq = 'AUGC'
     dot = '(..)'
     molecule = rna.Molecule(seq, dot)
     self.assertEqual(molecule.seq, seq)
     self.assertEqual(len(molecule.seq), len(molecule.dot))
Example #11
0
 def test_constructor_with_incorrect_bracket2(self):
     seq = 'AUGC'
     dot = ')..('
     with self.assertRaises(Exception):
         rna.Molecule(seq, dot)
Example #12
0
 def test_pair_matrix_funtion(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     p = rna.pair_matrix(m)
     self.assertEqual(p.sum(), 16)
     self.assertEqual(p.all(), p.T.all())
Example #13
0
 def test_dot_reverse_function(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     rev = rna.dot_reverse(m.dot)
     self.assertEqual(rev[:4], '(((.')
     self.assertEqual(rev.count('('), rev.count(')'))
Example #14
0
 def test_match_parentheses_function(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     self.assertEqual(rna.match_parentheses(m.dot, 3), 16)
Example #15
0
 def test_evaluate_method(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "((((((((....)))))..)))")
     k = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", ".(((((((....)))))..)).")
     l = rna.Molecule("GGCCUGAGGAGACUCAGAAGCC", "(((.(..((....))..).)))")
     self.assertGreater(m.evaluate(), k.evaluate())
     self.assertGreater(m.evaluate(), l.evaluate())
Example #16
0
 def test_repair_method(self):
     m = rna.Molecule("GGCCUGAGGAGACUCAGAAGCA", "(((((((((..))))))..)))")
     m.repair()
     self.assertEqual(m.dot, ".(((((((....)))))..)).")
Example #17
0
 def test_show_method(self):
     m = rna.Molecule('AGGCU')
     with self.assertRaises(Exception):
         m.show()