Beispiel #1
0
    def test_get_secondary_structure(self):
        """get_secondary_structure should give correct result."""
        test_seq = 'AAACCCGGGUUU'
        expected_struct = '((((....))))'
        expected_energy = -0.80

        obs_seq, obs_struct, obs_energy = \
            get_secondary_structure(test_seq)
        #Test get back correct seq and struct
        self.assertEqual(obs_seq, test_seq)
        self.assertEqual(obs_struct, expected_struct)
        self.assertEqual(obs_energy, expected_energy)
 def test_get_secondary_structure(self):
     """get_secondary_structure should give correct result."""
     test_seq =   'AAACCCGGGUUU'
     expected_struct = '((((....))))'
     expected_energy = -0.80
     
     obs_seq, obs_struct, obs_energy = \
         get_secondary_structure(test_seq)
     #Test get back correct seq and struct
     self.assertEqual(obs_seq, test_seq)
     self.assertEqual(obs_struct, expected_struct)
     self.assertEqual(obs_energy, expected_energy)
def run_locarnap(seqsin, numkept, cpus=1,foldless=False):
    '''Runs locarna-p on a set of sequences in MinimalFastaParser format
    [(header, seq), (header, seq)] and returns alignment and structure'''
    #make sure group has enough sequences before continuing
    if len(seqsin) < numkept and not foldless:
        return "", ""
    
    if len(seqsin) == 1:
        #raise ValueError("Need at least two sequences for locarna-p")
        return LoadSeqs(data=seqsin, moltype=RNA),  get_secondary_structure(seqsin[0][1])[1]
    #headers come out in format Header_# so split to get # and sort by abundance
    seqsin.sort(reverse=True, key=lambda count: int(count[0].split('_')[1]))
    #cut to numkept most abundant sequences
    if len(seqsin) > numkept:
        seqsin = seqsin[:numkept]
    
    aln, struct = create_locarnap_alignment(seqsin, RNA, struct=True, params={'--cpus': cpus})
    struct = struct.replace('-', ".")
    return aln, struct
Beispiel #4
0
def plot_rna_structure(seq, path='', subseqs=[], name='test'):
    """plot RNA structure using Vienna package"""

    import cogent.app.vienna_package as vienna
    colors = [" 1. 0. .2", " 0. .9 .5"]
    seq, struct, e = vienna.get_secondary_structure(seq)
    seqname = 'test'
    rp = vienna.RNAplot()
    i = 0
    x = ''
    if len(subseqs) > 0:
        for s in subseqs:
            ind = seq.find(s) + 1
            e = ind + len(s)
            x += format_cmark_values(range(ind, e), rgb=colors[i])
            i += 1
        rp.Parameters['--pre'].on('"%s"' % x)
    rp(['>' + seqname, seq, struct])
    filename = os.path.join(path, '%s.png' % name)
    os.system('convert test_ss.ps %s' % filename)
    return filename
Beispiel #5
0
def plot_rna_structure(seq, path='', subseqs=[], name='test'):
    """plot RNA structure using Vienna package"""

    import cogent.app.vienna_package as vienna
    colors = [" 1. 0. .2", " 0. .9 .5"]
    seq,struct,e = vienna.get_secondary_structure(seq)
    seqname='test'
    rp = vienna.RNAplot()
    i=0
    x=''
    if len(subseqs) > 0:
        for s in subseqs:
            ind = seq.find(s)+1
            e = ind+len(s)
            x += format_cmark_values(range(ind,e), rgb=colors[i])
            i+=1
        rp.Parameters['--pre'].on('"%s"' %x)
    rp(['>'+seqname,seq,struct])
    filename = os.path.join(path,'%s.png' %name)
    os.system('convert test_ss.ps %s' %filename)
    return filename
    #Now need to iteratively refine the groups down
    #check to make sure we need to first
    if not skipiter:
        startcount = 1
        endcount = 0
        iteration = 1
        secs = time()
        #try to match sequences in unfoldable clusters to other clusters that were folded
        #write seq to orphanseqs file if can't be grouped
        print len(nostruct), "of", count, "structureless"
        orphanseqs = open(otufolder + "orphanseqs.txt", 'w')
        orphanseqs.write("initial clustering\n")
        for cluster in nostruct:
            for seq in clusters[cluster]:
                found = False
                sequence, structure, energy = get_secondary_structure(seq[1])
                for gstruct in structgroups:
                    if score_rnaforester(structure, gstruct) >= foresterscore:
                        structgroups[gstruct].append(seq)
                        found = True
                        break  
                if not found:
                    orphanseqs.write('>%s\n%s\n' % seq)
                    structgroups[structure] = [seq]
        orphanseqs.close()

        #wipe out clusters dict to save memory
        clusters = 0              

        print "start: " + str(len(structgroups)) + " initial groups"
        #initial clustering by structures generated in first folding