コード例 #1
0
ファイル: Seq.py プロジェクト: manucorreia/biopython
    def back_transcribe(self):
        """Returns the DNA sequence from an RNA sequence. New Seq object.

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import IUPAC
        >>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", \
                                IUPAC.unambiguous_rna)
        >>> messenger_rna
        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG', IUPACUnambiguousRNA())
        >>> messenger_rna.back_transcribe()
        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG', IUPACUnambiguousDNA())

        Trying to back-transcribe a protein or DNA sequence raises an
        exception.

        >>> my_protein = Seq("MAIVMGR", IUPAC.protein)
        >>> my_protein.back_transcribe()
        Traceback (most recent call last):
           ...
        ValueError: Proteins cannot be back transcribed!
        """
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet):
            raise ValueError("Proteins cannot be back transcribed!")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.DNAAlphabet):
            raise ValueError("DNA cannot be back transcribed!")

        if self.alphabet == IUPAC.unambiguous_rna:
            alphabet = IUPAC.unambiguous_dna
        elif self.alphabet == IUPAC.ambiguous_rna:
            alphabet = IUPAC.ambiguous_dna
        else:
            alphabet = Alphabet.generic_dna
        return Seq(str(self).replace("U", "T").replace("u", "t"), alphabet)
コード例 #2
0
ファイル: Seq.py プロジェクト: nuin/biopython
    def back_transcribe(self):
        """Returns the DNA sequence from an RNA sequence. New Seq object.

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import IUPAC
        >>> messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", \
                                IUPAC.unambiguous_rna)
        >>> messenger_rna
        Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG', IUPACUnambiguousRNA())
        >>> messenger_rna.back_transcribe()
        Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG', IUPACUnambiguousDNA())

        Trying to back-transcribe a protein or DNA sequence raises an
        exception.

        >>> my_protein = Seq("MAIVMGR", IUPAC.protein)
        >>> my_protein.back_transcribe()
        Traceback (most recent call last):
           ...
        ValueError: Proteins cannot be back transcribed!
        """
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet) :
            raise ValueError("Proteins cannot be back transcribed!")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.DNAAlphabet) :
            raise ValueError("DNA cannot be back transcribed!")

        if self.alphabet==IUPAC.unambiguous_rna:
            alphabet = IUPAC.unambiguous_dna
        elif self.alphabet==IUPAC.ambiguous_rna:
            alphabet = IUPAC.ambiguous_dna
        else:
            alphabet = Alphabet.generic_dna
        return Seq(str(self).replace("U", "T").replace("u", "t"), alphabet)
コード例 #3
0
ファイル: Seq.py プロジェクト: manucorreia/biopython
    def complement(self):
        """Returns the complement sequence. New Seq object.

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import IUPAC
        >>> my_dna = Seq("CCCCCGATAG", IUPAC.unambiguous_dna)
        >>> my_dna
        Seq('CCCCCGATAG', IUPACUnambiguousDNA())
        >>> my_dna.complement()
        Seq('GGGGGCTATC', IUPACUnambiguousDNA())

        Trying to complement a protein sequence raises an exception.

        >>> my_protein = Seq("MAIVMGR", IUPAC.protein)
        >>> my_protein.complement()
        Traceback (most recent call last):
           ...
        ValueError: Proteins do not have complements!
        """
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet):
            raise ValueError("Proteins do not have complements!")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.DNAAlphabet):
            d = ambiguous_dna_complement
        elif isinstance(Alphabet._get_base_alphabet(self.alphabet),
                        Alphabet.RNAAlphabet):
            d = ambiguous_rna_complement
        elif 'U' in self._data and 'T' in self._data:
            #TODO - Handle this cleanly?
            raise ValueError("Mixed RNA/DNA found")
        elif 'U' in self._data:
            d = ambiguous_rna_complement
        else:
            d = ambiguous_dna_complement
        ttable = self.__maketrans(d)
        #Much faster on really long sequences than the previous loop based one.
        #thx to Michael Palmer, University of Waterloo
        s = str(self).translate(ttable)
        return Seq(s, self.alphabet)
コード例 #4
0
ファイル: Seq.py プロジェクト: nuin/biopython
    def complement(self):
        """Returns the complement sequence. New Seq object.

        >>> from Bio.Seq import Seq
        >>> from Bio.Alphabet import IUPAC
        >>> my_dna = Seq("CCCCCGATAG", IUPAC.unambiguous_dna)
        >>> my_dna
        Seq('CCCCCGATAG', IUPACUnambiguousDNA())
        >>> my_dna.complement()
        Seq('GGGGGCTATC', IUPACUnambiguousDNA())

        Trying to complement a protein sequence raises an exception.

        >>> my_protein = Seq("MAIVMGR", IUPAC.protein)
        >>> my_protein.complement()
        Traceback (most recent call last):
           ...
        ValueError: Proteins do not have complements!
        """
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet) :
            raise ValueError("Proteins do not have complements!")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.DNAAlphabet) :
            d = ambiguous_dna_complement
        elif isinstance(Alphabet._get_base_alphabet(self.alphabet),
                        Alphabet.RNAAlphabet) :
            d = ambiguous_rna_complement
        elif 'U' in self._data and 'T' in self._data:
            #TODO - Handle this cleanly?
            raise ValueError("Mixed RNA/DNA found")
        elif 'U' in self._data:
            d = ambiguous_rna_complement
        else:
            d = ambiguous_dna_complement
        ttable = self.__maketrans(d)
        #Much faster on really long sequences than the previous loop based one.
        #thx to Michael Palmer, University of Waterloo
        s = str(self).translate(ttable)
        return Seq(s, self.alphabet)
コード例 #5
0
ファイル: Seq.py プロジェクト: manucorreia/biopython
    def complement(self):
        """Modify the mutable sequence to take on its complement.

        Trying to complement a protein sequence raises an exception.

        No return value"""
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet):
            raise ValueError("Proteins do not have complements!")
        if self.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
            d = ambiguous_dna_complement
        elif self.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
            d = ambiguous_rna_complement
        elif 'U' in self.data and 'T' in self.data:
            #TODO - Handle this cleanly?
            raise ValueError("Mixed RNA/DNA found")
        elif 'U' in self.data:
            d = ambiguous_rna_complement
        else:
            d = ambiguous_dna_complement
        c = dict([(x.lower(), y.lower()) for x, y in d.iteritems()])
        d.update(c)
        self.data = map(lambda c: d[c], self.data)
        self.data = array.array('c', self.data)
コード例 #6
0
ファイル: Seq.py プロジェクト: nuin/biopython
    def complement(self):
        """Modify the mutable sequence to take on its complement.

        Trying to complement a protein sequence raises an exception.

        No return value"""
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet) :
            raise ValueError("Proteins do not have complements!")
        if self.alphabet in (IUPAC.ambiguous_dna, IUPAC.unambiguous_dna):
            d = ambiguous_dna_complement
        elif self.alphabet in (IUPAC.ambiguous_rna, IUPAC.unambiguous_rna):
            d = ambiguous_rna_complement
        elif 'U' in self.data and 'T' in self.data :
            #TODO - Handle this cleanly?
            raise ValueError("Mixed RNA/DNA found")
        elif 'U' in self.data:
            d = ambiguous_rna_complement
        else:
            d = ambiguous_dna_complement
        c = dict([(x.lower(), y.lower()) for x,y in d.iteritems()])
        d.update(c)
        self.data = map(lambda c: d[c], self.data)
        self.data = array.array('c', self.data)
コード例 #7
0
ファイル: Seq.py プロジェクト: manucorreia/biopython
    def translate(self, table="Standard", stop_symbol="*", to_stop=False):
        """Turns a nucleotide sequence into a protein sequence. New Seq object.

        Trying to back-transcribe a protein sequence raises an exception.
        This method will translate DNA or RNA sequences.

        Trying to translate a protein sequence raises an exception.

        table - Which codon table to use?  This can be either a name
                (string) or an NCBI identifier (integer).  This defaults
                to the "Standard" table.
        stop_symbol - Single character string, what to use for terminators.
                This defaults to the asterisk, "*".
        to_stop - Boolean, defaults to False meaning do a full translation
                continuing on past any stop codons (translated as the
                specified stop_symbol).  If True, translation is terminated
                at the first in frame stop codon (and the stop_symbol is
                not appended to the returned protein sequence).

        e.g. Using the standard table,
        
        >>> coding_dna = Seq("GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
        >>> coding_dna.translate()
        Seq('VAIVMGR*KGAR*', HasStopCodon(ExtendedIUPACProtein(), '*'))
        >>> coding_dna.translate(stop_symbol="@")
        Seq('VAIVMGR@KGAR@', HasStopCodon(ExtendedIUPACProtein(), '@'))
        >>> coding_dna.translate(to_stop=True)
        Seq('VAIVMGR', ExtendedIUPACProtein())
        
        Now using NCBI table 2, where TGA is not a stop codon:
        
        >>> coding_dna.translate(table=2)
        Seq('VAIVMGRWKGAR*', HasStopCodon(ExtendedIUPACProtein(), '*'))
        >>> coding_dna.translate(table=2, to_stop=True)
        Seq('VAIVMGRWKGAR', ExtendedIUPACProtein())

        If the sequence has no in-frame stop codon, then the to_stop argument
        has no effect:

        >>> coding_dna2 = Seq("TTGGCCATTGTAATGGGCCGC")
        >>> coding_dna2.translate()
        Seq('LAIVMGR', ExtendedIUPACProtein())
        >>> coding_dna2.translate(to_stop=True)
        Seq('LAIVMGR', ExtendedIUPACProtein())

        NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
        or a stop codon.  These are translated as "X".  Any invalid codon
        (e.g. "TA?" or "T-A") will throw a TranslationError.

        NOTE - Does NOT support gapped sequences.

        NOTE - This does NOT behave like the python string's translate
        method.  For that use str(my_seq).translate(...) instead.
        """
        try:
            table_id = int(table)
        except ValueError:
            table_id = None
        if isinstance(table, str) and len(table) == 256:
            raise ValueError("The Seq object translate method DOES NOT take " \
                             + "a 256 character string mapping table like " \
                             + "the python string object's translate method. " \
                             + "Use str(my_seq).translate(...) instead.")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet):
            raise ValueError("Proteins cannot be translated!")
        if self.alphabet == IUPAC.unambiguous_dna:
            if table_id is None:
                codon_table = CodonTable.unambiguous_dna_by_name[table]
            else:
                codon_table = CodonTable.unambiguous_dna_by_id[table_id]
        elif self.alphabet == IUPAC.ambiguous_dna:
            if table_id is None:
                codon_table = CodonTable.ambiguous_dna_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_dna_by_id[table_id]
        elif self.alphabet == IUPAC.unambiguous_rna:
            if table_id is None:
                codon_table = CodonTable.unambiguous_rna_by_name[table]
            else:
                codon_table = CodonTable.unambiguous_rna_by_id[table_id]
        elif self.alphabet == IUPAC.ambiguous_rna:
            if table_id is None:
                codon_table = CodonTable.ambiguous_rna_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_rna_by_id[table_id]
        else:
            if table_id is None:
                codon_table = CodonTable.ambiguous_generic_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_generic_by_id[table_id]
        protein = _translate_str(str(self), codon_table, stop_symbol, to_stop)
        if stop_symbol in protein:
            alphabet = Alphabet.HasStopCodon(codon_table.protein_alphabet,
                                             stop_symbol=stop_symbol)
        else:
            alphabet = codon_table.protein_alphabet
        return Seq(protein, alphabet)
コード例 #8
0
ファイル: Seq.py プロジェクト: nuin/biopython
    def translate(self, table="Standard", stop_symbol="*", to_stop=False):
        """Turns a nucleotide sequence into a protein sequence. New Seq object.

        Trying to back-transcribe a protein sequence raises an exception.
        This method will translate DNA or RNA sequences.

        Trying to translate a protein sequence raises an exception.

        table - Which codon table to use?  This can be either a name
                (string) or an NCBI identifier (integer).  This defaults
                to the "Standard" table.
        stop_symbol - Single character string, what to use for terminators.
                This defaults to the asterisk, "*".
        to_stop - Boolean, defaults to False meaning do a full translation
                continuing on past any stop codons (translated as the
                specified stop_symbol).  If True, translation is terminated
                at the first in frame stop codon (and the stop_symbol is
                not appended to the returned protein sequence).

        e.g. Using the standard table,
        
        >>> coding_dna = Seq("GTGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")
        >>> coding_dna.translate()
        Seq('VAIVMGR*KGAR*', HasStopCodon(ExtendedIUPACProtein(), '*'))
        >>> coding_dna.translate(stop_symbol="@")
        Seq('VAIVMGR@KGAR@', HasStopCodon(ExtendedIUPACProtein(), '@'))
        >>> coding_dna.translate(to_stop=True)
        Seq('VAIVMGR', ExtendedIUPACProtein())
        
        Now using NCBI table 2, where TGA is not a stop codon:
        
        >>> coding_dna.translate(table=2)
        Seq('VAIVMGRWKGAR*', HasStopCodon(ExtendedIUPACProtein(), '*'))
        >>> coding_dna.translate(table=2, to_stop=True)
        Seq('VAIVMGRWKGAR', ExtendedIUPACProtein())

        If the sequence has no in-frame stop codon, then the to_stop argument
        has no effect:

        >>> coding_dna2 = Seq("TTGGCCATTGTAATGGGCCGC")
        >>> coding_dna2.translate()
        Seq('LAIVMGR', ExtendedIUPACProtein())
        >>> coding_dna2.translate(to_stop=True)
        Seq('LAIVMGR', ExtendedIUPACProtein())

        NOTE - Ambiguous codons like "TAN" or "NNN" could be an amino acid
        or a stop codon.  These are translated as "X".  Any invalid codon
        (e.g. "TA?" or "T-A") will throw a TranslationError.

        NOTE - Does NOT support gapped sequences.

        NOTE - This does NOT behave like the python string's translate
        method.  For that use str(my_seq).translate(...) instead.
        """
        try:
            table_id = int(table)
        except ValueError:
            table_id = None
        if isinstance(table, str) and len(table)==256 :
            raise ValueError("The Seq object translate method DOES NOT take " \
                             + "a 256 character string mapping table like " \
                             + "the python string object's translate method. " \
                             + "Use str(my_seq).translate(...) instead.")
        if isinstance(Alphabet._get_base_alphabet(self.alphabet),
                      Alphabet.ProteinAlphabet) :
            raise ValueError("Proteins cannot be translated!")
        if self.alphabet==IUPAC.unambiguous_dna:
            if table_id is None:
                codon_table = CodonTable.unambiguous_dna_by_name[table]
            else:
                codon_table = CodonTable.unambiguous_dna_by_id[table_id]
        elif self.alphabet==IUPAC.ambiguous_dna:
            if table_id is None:
                codon_table = CodonTable.ambiguous_dna_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_dna_by_id[table_id]
        elif self.alphabet==IUPAC.unambiguous_rna:
            if table_id is None:
                codon_table = CodonTable.unambiguous_rna_by_name[table]
            else:
                codon_table = CodonTable.unambiguous_rna_by_id[table_id]
        elif self.alphabet==IUPAC.ambiguous_rna:
            if table_id is None:
                codon_table = CodonTable.ambiguous_rna_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_rna_by_id[table_id]
        else:
            if table_id is None:
                codon_table = CodonTable.ambiguous_generic_by_name[table]
            else:
                codon_table = CodonTable.ambiguous_generic_by_id[table_id]
        protein = _translate_str(str(self), codon_table, stop_symbol, to_stop)
        if stop_symbol in protein :
            alphabet = Alphabet.HasStopCodon(codon_table.protein_alphabet,
                                             stop_symbol = stop_symbol)
        else :
            alphabet = codon_table.protein_alphabet
        return Seq(protein, alphabet)