def test_get_secondary_structure(self): """get_secondary_structure should give correct result.""" test_seq = 'AAACCCGGGUUU' expected_struct = '((((....))))' expected_energy = -0.80 obs_seq, obs_struct, obs_energy = \ get_secondary_structure(test_seq) #Test get back correct seq and struct self.assertEqual(obs_seq, test_seq) self.assertEqual(obs_struct, expected_struct) self.assertEqual(obs_energy, expected_energy)
def run_locarnap(seqsin, numkept, cpus=1,foldless=False): '''Runs locarna-p on a set of sequences in MinimalFastaParser format [(header, seq), (header, seq)] and returns alignment and structure''' #make sure group has enough sequences before continuing if len(seqsin) < numkept and not foldless: return "", "" if len(seqsin) == 1: #raise ValueError("Need at least two sequences for locarna-p") return LoadSeqs(data=seqsin, moltype=RNA), get_secondary_structure(seqsin[0][1])[1] #headers come out in format Header_# so split to get # and sort by abundance seqsin.sort(reverse=True, key=lambda count: int(count[0].split('_')[1])) #cut to numkept most abundant sequences if len(seqsin) > numkept: seqsin = seqsin[:numkept] aln, struct = create_locarnap_alignment(seqsin, RNA, struct=True, params={'--cpus': cpus}) struct = struct.replace('-', ".") return aln, struct
def plot_rna_structure(seq, path='', subseqs=[], name='test'): """plot RNA structure using Vienna package""" import cogent.app.vienna_package as vienna colors = [" 1. 0. .2", " 0. .9 .5"] seq, struct, e = vienna.get_secondary_structure(seq) seqname = 'test' rp = vienna.RNAplot() i = 0 x = '' if len(subseqs) > 0: for s in subseqs: ind = seq.find(s) + 1 e = ind + len(s) x += format_cmark_values(range(ind, e), rgb=colors[i]) i += 1 rp.Parameters['--pre'].on('"%s"' % x) rp(['>' + seqname, seq, struct]) filename = os.path.join(path, '%s.png' % name) os.system('convert test_ss.ps %s' % filename) return filename
def plot_rna_structure(seq, path='', subseqs=[], name='test'): """plot RNA structure using Vienna package""" import cogent.app.vienna_package as vienna colors = [" 1. 0. .2", " 0. .9 .5"] seq,struct,e = vienna.get_secondary_structure(seq) seqname='test' rp = vienna.RNAplot() i=0 x='' if len(subseqs) > 0: for s in subseqs: ind = seq.find(s)+1 e = ind+len(s) x += format_cmark_values(range(ind,e), rgb=colors[i]) i+=1 rp.Parameters['--pre'].on('"%s"' %x) rp(['>'+seqname,seq,struct]) filename = os.path.join(path,'%s.png' %name) os.system('convert test_ss.ps %s' %filename) return filename
#Now need to iteratively refine the groups down #check to make sure we need to first if not skipiter: startcount = 1 endcount = 0 iteration = 1 secs = time() #try to match sequences in unfoldable clusters to other clusters that were folded #write seq to orphanseqs file if can't be grouped print len(nostruct), "of", count, "structureless" orphanseqs = open(otufolder + "orphanseqs.txt", 'w') orphanseqs.write("initial clustering\n") for cluster in nostruct: for seq in clusters[cluster]: found = False sequence, structure, energy = get_secondary_structure(seq[1]) for gstruct in structgroups: if score_rnaforester(structure, gstruct) >= foresterscore: structgroups[gstruct].append(seq) found = True break if not found: orphanseqs.write('>%s\n%s\n' % seq) structgroups[structure] = [seq] orphanseqs.close() #wipe out clusters dict to save memory clusters = 0 print "start: " + str(len(structgroups)) + " initial groups" #initial clustering by structures generated in first folding