def reduce_ambiguous_seqs(seqs, seqtype='DNA'): '''Get the most specific ambiguous DNA/RNA sequence of a set''' if seqtype == 'DNA': from Bio.Data.IUPACData import ambiguous_dna_values as ttable elif seqtype == 'RNA': from Bio.Data.IUPACData import ambiguous_rna_values as ttable del ttable['X'] ttable_back = {frozenset(value): key for (key, value) in ttable.iteritems()} from itertools import imap, izip seq = ''.join(imap(ttable_back.get, imap(frozenset, izip(*seqs)))) return seq
def complement(sequence): #TODO - Add a complement function to Bio/Seq.py? #There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] def sorted_dict(d): """A sorted repr of a dictionary.""" return "{%s}" % ", ".join("%s: %s" % (repr(k), repr(v)) for k, v in sorted(d.iteritems())) print print "DNA Ambiguity mapping:", sorted_dict(ambiguous_dna_values) print "DNA Complement mapping:", sorted_dict(ambiguous_dna_complement) for ambig_char, values in sorted(ambiguous_dna_values.iteritems()): compl_values = complement(values) print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char]) assert set(compl_values) == set( ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print print "RNA Ambiguity mapping:", sorted_dict(ambiguous_rna_values) print "RNA Complement mapping:", sorted_dict(ambiguous_rna_complement) for ambig_char, values in sorted(ambiguous_rna_values.iteritems()): compl_values = complement(values).replace( "T", "U") # need to help as no alphabet print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char]) assert set(compl_values) == set(
assert "?" not in ambiguous_dna_values def complement(sequence) : #TODO - Add a complement function to Bio/Seq.py? #There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] def sorted_dict(d) : """A sorted repr of a dictionary.""" return "{%s}" % ", ".join("%s: %s" % (repr(k),repr(v)) \ for k,v in sorted(d.iteritems())) print print "DNA Ambiguity mapping:", sorted_dict(ambiguous_dna_values) print "DNA Complement mapping:", sorted_dict(ambiguous_dna_complement) for ambig_char, values in sorted(ambiguous_dna_values.iteritems()) : compl_values = complement(values) print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char]) assert set(compl_values) == set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print print "RNA Ambiguity mapping:", sorted_dict(ambiguous_rna_values) print "RNA Complement mapping:", sorted_dict(ambiguous_rna_complement) for ambig_char, values in sorted(ambiguous_rna_values.iteritems()) : compl_values = complement(values).replace("T","U") #need to help as no alphabet print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char]) assert set(compl_values) == set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]]) print
#See bug 2380, Bio.Nexus was polluting the dictionary. assert "-" not in ambiguous_dna_values assert "?" not in ambiguous_dna_values def complement(sequence): #TODO - Add a complement function to Bio/Seq.py? #There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] print print "DNA Ambiguity mapping:", ambiguous_dna_values print "DNA Complement mapping:", ambiguous_dna_complement for ambig_char, values in ambiguous_dna_values.iteritems(): compl_values = complement(values) print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char]) assert set(compl_values) == set( ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print print "RNA Ambiguity mapping:", ambiguous_rna_values print "RNA Complement mapping:", ambiguous_rna_complement for ambig_char, values in ambiguous_rna_values.iteritems(): compl_values = complement(values).replace( "T", "U") #need to help as no alphabet print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char]) assert set(compl_values) == set(
print "Checking ambiguous complements" print "==============================" #See bug 2380, Bio.Nexus was polluting the dictionary. assert "-" not in ambiguous_dna_values assert "?" not in ambiguous_dna_values def complement(sequence) : #TODO - Add a complement function to Bio/Seq.py? #There is already a complement method on the Seq and MutableSeq objects. return Seq.reverse_complement(sequence)[::-1] print print "DNA Ambiguity mapping:", ambiguous_dna_values print "DNA Complement mapping:", ambiguous_dna_complement for ambig_char, values in ambiguous_dna_values.iteritems() : compl_values = complement(values) print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_dna_complement[ambig_char]) assert set(compl_values) == set(ambiguous_dna_values[ambiguous_dna_complement[ambig_char]]) print print "RNA Ambiguity mapping:", ambiguous_rna_values print "RNA Complement mapping:", ambiguous_rna_complement for ambig_char, values in ambiguous_rna_values.iteritems() : compl_values = complement(values).replace("T","U") #need to help as no alphabet print "%s={%s} --> {%s}=%s" % \ (ambig_char, values, compl_values, ambiguous_rna_complement[ambig_char]) assert set(compl_values) == set(ambiguous_rna_values[ambiguous_rna_complement[ambig_char]]) print