def test_fold_rna(self): """RNA folding to find min energy secondary structure.""" # unafold's estimates for free energy estimates of RNA oligos # most tests available at https://github.com/jaswindersingh2/SPOT-RNA/blob/master/sample_inputs/batch_seq.fasta unafold_dgs = { "ACCCCCUCCUUCCUUGGAUCAAGGGGCUCAA": -9.5, "AAGGGGUUGGUCGCCUCGACUAAGCGGCUUGGAAUUCC": -10.1, "UUGGAGUACACAACCUGUACACUCUUUC": -4.3, "AGGGAAAAUCCC": -3.3, "GCUUACGAGCAAGUUAAGCAAC": -4.6, "UGGGAGGUCGUCUAACGGUAGGACGGCGGACUCUGGAUCCGCUGGUGGAGGUUCGAGUCCUCCCCUCCCAGCCA": -32.8, "GGGCGAUGAGGCCCGCCCAAACUGCCCUGAAAAGGGCUGAUGGCCUCUACUG": -20.7, "GGGGGCAUAGCUCAGCUGGGAGAGCGCCUGCUUUGCACGCAGGAGGUCUGCGGUUCGAUCCCGCGCGCUCCCACCA": -31.4, } for seq, ufold in unafold_dgs.items(): d = dg(seq, temp=37.0) # accepting a 5% difference delta = abs(0.5 * min(d, ufold)) self.assertAlmostEqual(d, ufold, delta=delta)
def test_fold_dna(self): """DNA folding to find min energy secondary structure.""" # unafold's estimates for free energy estimates of DNA oligos unafold_dgs = { "GGGAGGTCGTTACATCTGGGTAACACCGGTACTGATCCGGTGACCTCCC": -10.94, # three branched structure "GGGAGGTCGCTCCAGCTGGGAGGAGCGTTGGGGGTATATACCCCCAACACCGGTACTGATCCGGTGACCTCCC": -23.4, # four branched structure "CGCAGGGAUACCCGCG": -3.8, "TAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGT": -6.85, "GGGGGCATAGCTCAGCTGGGAGAGCGCCTGCTTTGCACGCAGGAGGTCTGCGGTTCGATCCCGCGCGCTCCCACCA": -15.50, "TGAGACGGAAGGGGATGATTGTCCCCTTCCGTCTCA": -18.10, "ACCCCCTCCTTCCTTGGATCAAGGGGCTCAA": -3.65, } for seq, ufold in unafold_dgs.items(): d = dg(seq, temp=37.0) # accepting a 60% difference delta = abs(0.6 * min(d, ufold)) self.assertAlmostEqual(d, ufold, delta=delta)
def test_fold_cache(self): """Gather a cache of the folded structure.""" seq = "ATGGATTTAGATAGAT" cache = dg_cache(seq) seq_dg = dg(seq) self.assertAlmostEqual(seq_dg, cache[0][len(seq) - 1], delta=1)
def seqfoldScore(queries, returnSS=False): ''' get the secondary structure for a given sequence using seqfold here - identical features are available using nupack, though results are sometimes different :param sequence: :return: ''' temperature = 37.0 # celcius sequences = numbers2letters(queries) energies = np.zeros(len(sequences)) strings = [] pairLists = [] i = -1 for sequence in sequences: i += 1 if len(sequence) == 1: en = np.inf else: en = dg(sequence, temp=temperature ) # get predicted minimum energy of folded structure if np.isfinite(en): if en > 1500: # no idea why it does this but sometimes it adds 1600 - we will upgrade this to nupack in the future energies[i] = en - 1600 else: energies[i] = en else: energies[i] = 5 # np.nan # set infinities as being very unlikely if returnSS: structs = fold(sequence) # identify structural features # print(round(sum(s.e for s in structs), 2)) # predicted energy of the final structure desc = ["."] * len(sequence) pairList = [] for s in structs: pairList.append(s.ij[0]) if len(s.ij) == 1: i, j = s.ij[0] desc[i] = "(" desc[j] = ")" ssString = "".join(desc) # secondary structure string strings.append(ssString) pairList = np.asarray(pairList) + 1 # list of paired bases pairLists.append(pairList) if returnSS: return energies, strings, pairLists else: return energies
def test_fold(self): """Fold function.""" # it should throw if a nonsense sequence is provided with self.assertRaises(RuntimeError): dg("EASFEASFAST", 37.0) # Both U and T, mix of RNA and DNA with self.assertRaises(RuntimeError): dg("ATGCATGACGATUU", 37.0) # should not throw dg("ATGGATTTAGATAGAT")