def codon_counter(nt, codons, nt_type='dna'):

    # Stores codons used for each amino acid and frequency used for said amino acid
    codon_table = dict()

    # Grabs the key (aa) for the given value (codon)
    def get_key(val):
        for key, value in codons.items():
            if val in value:
                return key

    # Handles a RNA string passed to the codon counter
    if nt_type == 'rna' and type(nt) is not Seq:
        nt = Seq(nt)
        nt = nt.back_transcribe()
    elif nt_type == 'rna' and type(nt) is Seq:
        nt = nt.back_transcribe()

    start = None
    stop = None

    # Start and stop codons identified for the sequence
    for frame in range(0, len(nt), 3):
        if nt[frame:frame + 3] == 'ATG' and not start:
            print(
                f'Start codon {nt[frame: frame + 3]} identified at position {frame}'
            )
            start = frame
        # mRNA-1273 contains all three stop codons at the end of the sequence
        # TAG was the last one before the 3' UTR so all stop codons included in the codon table
        if nt[frame:frame + 3] == 'TAG' and not stop:
            print(
                f'Stop codon {nt[frame: frame + 3]} identified at position {frame}'
            )
            stop = frame + 3

    # Trimmed nt sequence starting at ATG and ending at TAG
    nt_cds = nt[start:stop]
    prev_codon = ''
    # Counting codons used per amino acid
    for frame in range(3, (len(nt_cds) + 3), 3):
        aa = get_key(nt_cds[frame - 3:frame])
        codon_table.setdefault(aa, []).append(str(nt_cds[frame - 3:frame]))

    # Returns a list of tuples (codon, num times used to translate aa in nt seq provided / total codons for aa)
    for aa in codon_table.keys():
        codon_counts = {
            aa: [(codon,
                  round(codon_table[aa].count(codon) / len(codon_table[aa]),
                        3)) for codon in set(codon_table[aa])]
        }
        codon_table.update(codon_counts)
    print(GC123(nt_cds))

    return codon_table
コード例 #2
0
def main():
    (opts, args) = getoptions()

    # Load PWMs
    pssms = load_motifs(opts.pwm_dir, opts.pseudocount)

    if opts.testseq is not None:
        if opts.seqtype == 'RNA':
            seq = Seq(opts.testseq,
                      IUPAC.IUPACUnambiguousRNA()).back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()
        else:
            seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA())
        final = scan_all(pssms, seq, opts)
        print final.to_csv(sep="\t", index=False)
    else:
        # Scan in sequence
        print >> sys.stderr, "Scanning sequences ",
        tic = time.time()
        for seqrecord in SeqIO.parse(open(args[0]), "fasta"):

            seq = seqrecord.seq
            if opts.seqtype == "RNA":
                seq = seq.back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()

            final = scan_all(pssms, seq, opts)
            print final.to_csv(sep="\t", index=False)

        toc = time.time()
        print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
コード例 #3
0
ファイル: app.py プロジェクト: christelvanharen/blok4afvink4
def web_pagina():
    """
    Als de sequentie wordt ingevoerd, dan wordt dit vertaald naar de
    gewenste soort. Als het een eiwit is, dan kan deze geblast worden.
    :return: De web applicatie dna, rna of eiwit.
    """
    seq = request.args.get("seq", '')
    seq = seq.upper()
    if check_dna(seq):
        bio_dna = Seq(seq, generic_dna)
        return render_template("afvink4.html", soort='DNA',
                               een=(bio_dna.transcribe()),
                               twee=(bio_dna.translate()))
    elif check_rna(seq):
        bio_rna = Seq(seq, generic_rna)
        return render_template("avink4.html",
                               soort='RNA',
                               een=(bio_rna.back_transcribe()),
                               twee=(bio_rna.translate()))

    elif check_eiwit(seq):
        return render_template("afvink4.html",
                               soort='Eiwit',
                               een="Klik op de link en druk op BLAST.",
                               twee="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&QUERY=" + str(seq))
    else:
        return render_template("afvink4.html",
                               soort = 'Geen DNA, RNA of eiwit',
                               een='',
                               twee='')
コード例 #4
0
ファイル: motif_scan.py プロジェクト: miha-skalic/motif_scan
def main():
	(opts, args) = getoptions()

	# Load PWMs
	pssms = load_motifs(opts.pwm_dir, opts.pseudocount)

	if opts.testseq is not None:
		if opts.seqtype == 'RNA':
			seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousRNA()).back_transcribe()
			seq.alphabet = IUPAC.IUPACUnambiguousDNA()
		else:
			seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA())
		final = scan_all(pssms, seq, opts)
		print final.to_csv(sep="\t", index = False)
	else:
		# Scan in sequence
		print >> sys.stderr, "Scanning sequences ",
		tic = time.time()
		for seqrecord in SeqIO.parse(open(args[0]), "fasta"):

			seq = seqrecord.seq
			if opts.seqtype == "RNA":
				seq = seq.back_transcribe()
			seq.alphabet = IUPAC.IUPACUnambiguousDNA()

			final = scan_all(pssms, seq, opts)
			print final.to_csv(sep="\t", index = False)

		toc = time.time()
		print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
コード例 #5
0
    def manage_rna(data):
        sequence = Seq(data.sequence, IUPAC.unambiguous_rna)

        treated_data = Processed_dna_rna(
            creation_date=data.creation_date.strftime("%d/%m/%Y, %H:%M:%S"),
            translation_table=data.translation_table,
            coding_dna=str(sequence.back_transcribe()),
            dna_c=str(sequence.back_transcribe().complement()),
            dna_rc=str(sequence.back_transcribe().reverse_complement()),
            rna_m=str(sequence),
            rna_m_c=str(sequence.complement()),
            protein=str(sequence.translate(table=data.translation_table)),
            protein_to_stop=str(
                sequence.translate(table=data.translation_table,
                                   to_stop=True)))

        return Sequencer.extract_sequence_data(treated_data)
コード例 #6
0
def get_switch_recognition_seq(trigger, sequence_type, length_unpaired):
    ''' This function receives a target trigger sequence and the type of molecule
    and obtains the RNA trigger for it.
    '''
    # if sequence_type == 'RNA':
    #     trigger_seq = Seq(trigger, generic_rna)
    #     return(trigger_seq.back_transcribe().reverse_complement().transcribe())
    # elif sequence_type == 'DNA':
    #     trigger_seq = Seq(trigger, generic_dna)
    #     return(trigger_seq.reverse_complement().transcribe())

    trigger_seq = Seq(trigger, generic_rna)
    return (trigger_seq.back_transcribe().reverse_complement().transcribe())
コード例 #7
0
class RNA:
    def __init__(self, input, path=True):
        self.input = input
        # if input is a path to fasta:
        if path:
            self.sequence = SeqIO.parse(input, 'fasta')
        # if input is a sequence:
        else:
            self.sequence = Seq(str(input))

    def do_translation(self):
        return self.sequence.translate()

    def do_reverse_transcription(self):
        return self.sequence.back_transcribe()
コード例 #8
0
def web_pagina():
    """
    Haalt de ingevoerde seq op van de webpagina, en kijkt of dit DNA, RNA
    of eiwit is. En voert hier de gewenste acties op uit.
    :return: De webaplicatie
    """
    seq = request.args.get("seq", '')
    seq = seq.upper()
    # Checkt met andere funcie of het DNA is
    if check_dna(seq):
        bio_dna = Seq(seq, generic_dna)
        # Wanneer DNA, returnd hij dat het DNA is en
        # Geeft hij de bijbehoorende RNA en eiwit streng.
        return render_template("Afvink4.html",
                               soort='DNA',
                               een=(bio_dna.transcribe()),
                               twee=(bio_dna.translate()))
    # Wanner het geen DNA is kijkt hij of het RNA is
    elif check_rna(seq):
        bio_rna = Seq(seq, generic_rna)
        # Wanneer RNA, returnd hij dat het RNA is en
        # Geeft hij de bijbehoorende DNA en eiwit streng.
        return render_template("Afvink4.html",
                               soort='RNA',
                               een=(bio_rna.back_transcribe()),
                               twee=(bio_rna.translate()))

    # als het zowel geen DNA als RNA is kijkt hij of het een eiwit is
    elif check_eiwit(seq):
        # Wanneer eiwit, returnd hij dat het een eiwit is en
        # geeft hij een link naar de ncbi website met ingevulde resultaat zodat
        # je de eiwit sequentie kan blasten
        return render_template(
            "Afvink4.html",
            soort='Eiwit',
            een="klik op de link en druk op blast",
            twee=
            "https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastp&PAGE_TYPE=BlastSearch&QUERY="
            + str(seq))
    # Als het zowel geen DNA, RNA of eiwit is dan returnd hij dat het
    # geen DNA, RNA of eiwit is
    else:

        return render_template("Afvink4.html",
                               soort='Geen DNA, RNA of eiwit',
                               een='',
                               twee='')
コード例 #9
0
def transcribe(records, transcribe):
    """
    Perform transcription or back-transcription.
    transcribe must be one of the following:
        dna2rna
        rna2dna
    """
    logging.info('Applying _transcribe generator: '
                 'operation to perform is ' + transcribe + '.')
    for record in records:
        sequence = str(record.seq)
        description = record.description
        name = record.id
        if transcribe == 'dna2rna':
            dna = Seq(sequence, IUPAC.ambiguous_dna)
            rna = dna.transcribe()
            yield SeqRecord(rna, id=name, description=description)
        elif transcribe == 'rna2dna':
            rna = Seq(sequence, IUPAC.ambiguous_rna)
            dna = rna.back_transcribe()
            yield SeqRecord(dna, id=name, description=description)
コード例 #10
0
ファイル: transform.py プロジェクト: pansapiens/seqmagick
def transcribe(records, transcribe):
    """
    Perform transcription or back-transcription.
    transcribe must be one of the following:
        dna2rna
        rna2dna
    """
    logging.info('Applying _transcribe generator: '
                 'operation to perform is ' + transcribe + '.')
    for record in records:
        sequence = str(record.seq)
        description = record.description
        name = record.id
        if transcribe == 'dna2rna':
            dna = Seq(sequence, generic_dna)
            rna = dna.transcribe()
            yield SeqRecord(rna, id=name, description=description)
        elif transcribe == 'rna2dna':
            rna = Seq(sequence, generic_rna)
            dna = rna.back_transcribe()
            yield SeqRecord(dna, id=name, description=description)
コード例 #11
0
ファイル: clip_tools.py プロジェクト: sskhon-2014/Graphy
def getSeed(mature_miRNA):
    from Bio.Seq import Seq
    from Bio.Alphabet import generic_dna
    from Bio.Alphabet import generic_rna
    '''takes in a mature miRNA sequence, returns 8mer

    mRNA    | | N N N N N N N N | | |
    miRNA       8 7 6 5 4 3 2 1 
    8mer    | | N N N N N N N A | | |
    7mer-m8 | | N N N N N N N | | | |
    7mer-A1 | | | N N N N N N A | | |
    6mer    | | | N N N N N N | | | |
    '''

    my_rna = Seq(mature_miRNA, generic_rna)  #sequence is RNA
    mRNA = str(my_rna.back_transcribe().reverse_complement()[-8:])
    seed = mRNA[:-1]  # 2-7
    #print "found seed " + seed
    mer6 = seed[1:7]  # 2-6
    mer8 = seed + "A"
    mer7a = seed[1:7] + "A"
    mer7m8 = str(seed)
    seeds = {"8mer": mer8, "7mer-m8": mer7m8, "7mer1a": mer7a, "6mer": mer6}
    return seeds
コード例 #12
0
def write_output_records(args, p_lncrna_w_tfos):
    """ Write out stuff
    """
    out_file = args['o']

    total_found = 0
    out_records = []
    for lncrna in p_lncrna_w_tfos:
        for tfo in lncrna.lnctfos:
            args_id = [tfo.id_lncrna, str(round(tfo.thermo["dg"], 2)),
                       str(round(tfo.thermo["tm"] - 273.15, 2))]
            id_f = "|".join(val for val in args_id)
            seq_rec = Seq(tfo.seq, IUPAC.unambiguous_rna)
            back_transc = seq_rec.back_transcribe()
            complement = back_transc.complement()
            rec_sense = SeqRecord(complement, id=id_f, description="")
            # if it gets too large you should implement a different solution
            out_records.append(rec_sense)
            rec_antisense = SeqRecord(back_transc, id=id_f, description="")
            out_records.append(rec_antisense)
            total_found += 1

    SeqIO.write(out_records, out_file, "fasta")
    print "\nDone! Found a total of {0} possible TFOs\n".format(total_found)
コード例 #13
0
#reproduzindo processo de tradução
from Bio.Seq import Seq
mySeq = Seq("ATG")

#traduzir uma sequencia de rna mensageiro em uma sequencia de proteinas

#transcrição
seqRNA = mySeq.transcribe()
seqDNA = mySeq.back_transcribe()

#tradução
seqProteineRNA = seqRNA.translate()
print(seqProteineRNA)

seqProteineDNA = seqDNA.translate()
print(seqProteineDNA)

コード例 #14
0
f1 = open(output_file, 'w')

slen = seed_size + 1

features = []

for miR in SeqIO.parse(mirna, "fasta"):
    mir_seed = Seq(str(miR.seq)[1:slen], generic_rna)  #"AAGGCAC"
    print(miR.name)
    print(str(mir_seed))
    for utr in SeqIO.parse(utr_Database, "fasta"):
        pos = 0
        for seq in window(str(utr.seq), len(str(mir_seed))):
            if (hamming2(str(seq.upper()),
                         str(mir_seed.back_transcribe().reverse_complement()))
                    <= nb_max_mismatch):
                f1.write(utr.id + "\t" + str(pos) + "\t" +
                         str(pos + len(str(mir_seed))) + "\t" + miR.id + "\t" +
                         str(
                             hamming2(
                                 str(seq.upper()),
                                 str(mir_seed.back_transcribe().
                                     reverse_complement()))) + "\t" + "+" +
                         "\t" + str(seq.upper()) + "\n")
                features.append(
                    GraphicFeature(start=pos,
                                   end=pos + len(str(mir_seed)),
                                   strand=+1,
                                   color="#ccccff",
                                   label=re.sub(r'mmu-', '', miR.id)))
コード例 #15
0
print(f"Sequencia complementar: {seq_comp}")

#sequencia complementar reversa normal 5'--- 3' reversa para 3'--- 5'- Seq DNA
seq_reverse_comp = my_seq.reverse_complement()

print(f"Sequencia complementar reversa: {seq_reverse_comp}")

#Processo de transcricao
seq_rna = my_seq.transcribe()

print(f"Sequencia RNA: {seq_rna}")

#Processo de transcricao reverso

seq_rna_rev = my_seq.back_transcribe()

print(f"Sequencia RNA reversa: {seq_rna_rev}")

#Processo de traducao RNA --> (Aminoacido) Proteinas

seq_proteina_rna = seq_rna.translate()
seq_proteina_dna = my_seq.translate()

print(f"Sequencia proteina form RNA: {seq_proteina_rna}")
print(f"Sequencia proteina form DNA: {seq_proteina_dna}")


#Analise arquivos FASTA

for fasta in SeqIO.parse("./seq.fasta","fasta"):
コード例 #16
0
# load my biopython library
from Bio.Seq import Seq

# define my DNA sequence (randomly made)
dna_str = "atgcgcgctagatcgatagta"

sequence = Seq(dna_str)

#make some variables to hold strings of the translated code

# give me RNA from the DNA
RNAfromDNA_str = Seq.transcribe(
    sequence)  #transcription step: converting dna to rna
# give me DNA from the RNA
DNAfromRNA_str = Seq.back_transcribe(RNAfromDNA_str)
# give me the protein from the dna
PROTfromRNA_str = Seq.translate(RNAfromDNA_str)

# print the output of the string variables

print("\t 1 Original DNA     :", dna_str, ", length is :", len(dna_str))
print("\t 1 RNA from DNA     :", RNAfromDNA_str)
print("\t 1 DNA from RNA     :", DNAfromRNA_str)
print("\t 1 PROTEIN from RNA :", PROTfromRNA_str)

##################################################
# new sequence
##################################################
dna_str = "atgcgcgctagattcgatagta"
コード例 #17
0
ファイル: crisprdesign.py プロジェクト: nmorris/crisprdesign
class SgRna:
	"""Holds information about a single guide RNA.
	Args:
		protospacer(Bio.Seq): sequence of the protospacer (sans constant portion). Can only be set on initialization.
		target_site(GenomicLocation): location targeted by the protospacer
		target_seq(str): sequence window +/- 10 bases around protospacer (can be used to find PAM)
		offtarget_sites{GenomicLocation: [geneid1, geneid2,...]}: holds info about potential offtarget sites found in genome of interest, including if those offtargets fall within genes
		pam(str): protospacer adjacent motif for this guide
		constant_region(Bio.Seq): constant region associated with this guide. Used to calculate secondary structure.
		score(float): score of this guide
	"""
	# instance variables
	def __init__(self, seq, constant_region="GUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUU", target_site=None, target_seq="", pam="" ):
					  # weissman constant = "GUUUAAGAGCUAAGCUGGAAACAGCAUAGCAAGUUUAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUUU"
					     # broad constant = "GUUUUAGAGCUAGAAAUAGCAAGUUAAAAUAAGGCUAGUCCGUUAUCAACUUGAAAAAGUGGCACCGAGUCGGUGCUUUUUU"
		# turn DNA input into RNA
		seq_copy = seq
		if seq_copy.find("T"): # convert to RNA if not already done
			seq_copy = seq.replace( "T", "U" )
		#print seq, seq_copy
		self.protospacer = Seq(seq_copy, generic_rna) # sequence sans constant portion. can only set protospacer on initialization. always stored as RNA
		self.target_site = target_site # will eventually become a GenomicLocation. strand (+ means sgrna seq same as + strand, - means sgrna seq same as - strand), 
		self.target_seq = target_seq # 10 bases on either side of site
		self.offtarget_sites = {} # dict, format = {GenomicLocation: [gene1, gene2, gene3...]}
		self.pam = pam
		self.constant_region = Seq( constant_region, generic_rna )
		self.score = 0
	def __eq__( self, other ):
		return( (self.protospacer, self.target_site, self.target_seq, self.offtarget_sites, self.constant_region, self.score) == (other.protospacer, other.target_site, other.target_seq, other.offtarget_sites, other.constant_region, other.score))
	def __ne__( self, other ):
		return not self == other
			
	def build_fullseq( self ):
		fullseq = self.protospacer + self.constant_region
		if fullseq[0] != "G":
			fullseq = "G" + fullseq
		return fullseq
	
	def build_protospacerpam( self ): # returns DNA
		p = self.protospacer.back_transcribe()
		if self.pam != "":
			return Seq( str(p)+str(self.pam), generic_dna )
		if self.target_seq == "":
			print "Can't build PAM without sequence context. Defaulting to protospacer %s" % self.protospacer.back_transcribe()
			return self.protospacer.back_transcribe()
		else:
			temp_target = self.target_seq
			index = temp_target.find( p )
			if index == -1:
				temp_target = temp_target.reverse_complement()
				index = temp_target.find( p )
			if index == -1:
				print "Can't find protospacer in target sequence"
				# print p, temp_target
				return ""
			if self.pam != "":
				pam = self.pam
			else:
				pam = temp_target[ index+len(p):index+len(p)+len(self.pam) ]
				self.pam = pam
			return Seq( str(p)+str(self.pam), generic_dna )
コード例 #18
0
ファイル: __init__.py プロジェクト: JuantonioMS/pyngs
 def retrotranscription(self):
     seq = Seq(self.sequence, IUPAC.unambiguous_rna)
     retro_transcript = str(seq.back_transcribe())
     return True, retro_transcript, self.quality
コード例 #19
0
ファイル: BioPython .py プロジェクト: hossainlab/PY4B
import Bio

Bio.__version__

from Bio.Seq import Seq

s1 = Seq('ATGGCTTTATTTTCCCGGGA')

s1.complement()

s1.reverse_complement()

s1.transcribe()

s1.back_transcribe()

s1.back_transcribe() == Seq('ATGGCTTTATTTTCCCGGGA')

s1.translate()
コード例 #20
0
    def __init__(self,
                 sequence,
                 origin_id,
                 host_id,
                 translation_table_origin=1,
                 translation_table_host=1,
                 use_frequency=False,
                 lower_threshold=None,
                 strong_stop=True,
                 lower_alternative=True,
                 use_replacement_table=True,
                 use_highest_frequency_if_ambiguous=True):
        """
        Initialize the Sequence object
        sequence                    - DNA or RNA sequence as Bio.Seq object or string. This can for example be
                                      generated by using BioPython directly or by loading a FASTA file using
                                      LibCharm.IO.load_file
        origin_id                   - Species id of the origin organism (can be found in the URL at
                                      http://www.kazusa.or.jp/codon)
        host_id                     - Species id of the host organism (can be found in the URL at
                                      http://www.kazusa.or.jp/codon)
        translation_table_host      - Integer; Genetic code used by the target host organism. Corrensponds to one of
                                      the translation tables listed here:
                                      http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        translation_table_origin    - Integer; Genetic code used by the target host organism. Corrensponds to one of
                                      the translation tables listed here:
                                      http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        use_frequency               - Boolean; Use frequency per thousand instead of fraction during the assessment of
                                      the codon usage
        lower_threshold             - Float; Threshold that defines the minimum codon usage that is considered
                                      appropriate. By default, a harmonized codon can only be lower than this
                                      threshold, if the original codon usage in the original organism is lower than
                                      this threshold, too.
        strong_stop                 - Boolean; Defines whether a strong stop codon (e.g. TAA in bacterial hosts) should
                                      be used. This may cause the stop codon not to be perfectly harmonized.
        lower_alternative           - Boolean; Defines whether the lower or higher usage codon should be used if df for
                                      two alternative codons is equal.
        use_replacement_table       - Boolean; If true, do not compute the harmonization for every single codon in the
                                      sequence, but for every unique codon in the sequence. This is done by default as
                                      it is much faster.
        use_highest_frequency_if_ambiguous - Boolean: If the sequence contains ambiguous codons (e.g. GCN), always
                                             assume that the most frequent unambiguous codon is used. If set to 'False',
                                             the least frequent unambiguous codon will be used.
        """

        # setting threshold if provided, otherwise fall back to defaults
        if not lower_threshold:
            if use_frequency:
                if not lower_threshold:
                    self.lower_threshold = 5
            else:
                if not lower_threshold:
                    self.lower_threshold = 0.1
        else:
            self.lower_threshold = lower_threshold

        # set other variables to provided values or defaults
        self.strong_stop = strong_stop
        self.lower_alternative = lower_alternative
        self.use_replacement_table = use_replacement_table
        self.use_frequency = use_frequency
        self.use_highest_frequency_if_ambiguous = use_highest_frequency_if_ambiguous

        # generate a list of ambiguous DNA letters only (IUPACData.ambiguous_dna_letters also includes the unambiguous
        # G, C, A and T.
        self.ambiguous_dna_letters = list(
            set(IUPACData.ambiguous_dna_letters) -
            set(IUPACData.unambiguous_dna_letters))

        # check if translation table id is > 15. Values > 15 cannot be mapped to http://www.kazusa.or.jp/codon/!
        if translation_table_origin > 15 or translation_table_host > 15:
            raise ValueError(
                'Though the NCBI lists more than 15 translation tables, CHarm is limited to the '
                'first 15 as listed on \'http://www.kazusa.or.jp/codon/\'.')
        # Set translation table for original sequence
        self.translation_table_origin = CodonTable.ambiguous_dna_by_id[int(
            translation_table_origin)]
        self.translation_table_host = CodonTable.ambiguous_dna_by_id[int(
            translation_table_host)]
        # Reformat and sanitize sequence string (remove whitespaces, change to uppercase)
        if type(sequence) is 'str':
            try:
                # if a string is provided, check if it contains U and not T to distinguish between RNA and DNA
                if 'U' in sequence and not 'T' in sequence:
                    seq = Seq(''.join(sequence.upper().split()),
                              IUPAC.ambiguous_rna)
                    # if RNA, convert to DNA alphabet
                    self.original_sequence = seq.back_transcribe()
                else:
                    self.original_sequence = Seq(
                        ''.join(sequence.upper().split()), IUPAC.ambiguous_dna)
            except ValueError as error:
                print('ERROR: {}'.format(error))
                exit(1)
        else:
            self.original_sequence = sequence
        # Translate original DNA sequence to amino acid sequence
        self.original_translated_sequence = self.translate_sequence(
            self.original_sequence, self.translation_table_origin, cds=True)
        # Initialize empty harmonize sequence
        self.harmonized_sequence = ''

        # Fetch codon usage tables for original and host organism
        self.usage_origin = CodonUsageTable(
            'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?'
            'species={}&aa={}&style=N'.format(origin_id,
                                              translation_table_origin),
            self.use_frequency)
        self.usage_host = CodonUsageTable(
            'http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?'
            'species={}&aa={}&style=N'.format(host_id, translation_table_host),
            self.use_frequency)
        # Split DNA sequence into list of codons
        self.codons = self.split_original_sequence_to_codons()
        # Harmonize codon usage
        self.harmonize_codons()
        # Construct new sequence out of harmonized codons
        self.harmonized_sequence = self.construct_new_sequence()
        # Translate harmonized DNA sequence to amino acid sequence
        self.harmonized_translated_sequence = self.translate_sequence(
            self.harmonized_sequence, self.translation_table_host, cds=True)
コード例 #21
0
    print("\t User picks option: nt")
    if orient_str == '3-5':
        print("\t nt and 3'- 5'")
        dnaSeq_str = dnaSeq_str.reverse_complement()

    elif orient_str == '5-3':
        print("\t nt and 5'-3'")
        dnaSeq_str = dnaSeq_str.complement()
    else:
        print("\t Unknown primality")
        exit()

else:
    print("\t Unknown template type")
    exit()
print("\t + End of DNA Manipulation algorithm. DNASeq is: ", dnaSeq_str, "\n")

# if you want to add some translation functionality ...
print("\t __Translation__")
sequence = dnaSeq_str
RNAfromDNA_str = Seq.transcribe(sequence)  # gives RNA sequence
DNAfromRNA_str = Seq.back_transcribe(
    RNAfromDNA_str)  # gives DNA sequence from the RNA conversion
PROTfromRNA_str = Seq.translate(RNAfromDNA_str)

print(" Original DNA  :", dnaSeq_str)
print(" RNA from DNA  :", RNAfromDNA_str)
print(" DNA from RNA  :", DNAfromRNA_str)
print(" PROT from RNA :", PROTfromRNA_str)
print(" End of program!")
コード例 #22
0
 def back_transcribe(self):
     seq = Seq(self.string)
     return Dna(str(seq.back_transcribe()))
コード例 #23
0
ファイル: __init__.py プロジェクト: Athemis/charm
    def __init__(self, sequence, origin_id, host_id, translation_table_origin=1, translation_table_host=1,
                 use_frequency=False, lower_threshold=None, strong_stop=True, lower_alternative=True,
                 use_replacement_table=True, use_highest_frequency_if_ambiguous=True):
        """
        Initialize the Sequence object
        sequence                    - DNA or RNA sequence as Bio.Seq object or string. This can for example be
                                      generated by using BioPython directly or by loading a FASTA file using
                                      LibCharm.IO.load_file
        origin_id                   - Species id of the origin organism (can be found in the URL at
                                      http://www.kazusa.or.jp/codon)
        host_id                     - Species id of the host organism (can be found in the URL at
                                      http://www.kazusa.or.jp/codon)
        translation_table_host      - Integer; Genetic code used by the target host organism. Corrensponds to one of
                                      the translation tables listed here:
                                      http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        translation_table_origin    - Integer; Genetic code used by the target host organism. Corrensponds to one of
                                      the translation tables listed here:
                                      http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
        use_frequency               - Boolean; Use frequency per thousand instead of fraction during the assessment of
                                      the codon usage
        lower_threshold             - Float; Threshold that defines the minimum codon usage that is considered
                                      appropriate. By default, a harmonized codon can only be lower than this
                                      threshold, if the original codon usage in the original organism is lower than
                                      this threshold, too.
        strong_stop                 - Boolean; Defines whether a strong stop codon (e.g. TAA in bacterial hosts) should
                                      be used. This may cause the stop codon not to be perfectly harmonized.
        lower_alternative           - Boolean; Defines whether the lower or higher usage codon should be used if df for
                                      two alternative codons is equal.
        use_replacement_table       - Boolean; If true, do not compute the harmonization for every single codon in the
                                      sequence, but for every unique codon in the sequence. This is done by default as
                                      it is much faster.
        use_highest_frequency_if_ambiguous - Boolean: If the sequence contains ambiguous codons (e.g. GCN), always
                                             assume that the most frequent unambiguous codon is used. If set to 'False',
                                             the least frequent unambiguous codon will be used.
        """

        # setting threshold if provided, otherwise fall back to defaults
        if not lower_threshold:
            if use_frequency:
                if not lower_threshold:
                    self.lower_threshold = 5
            else:
                if not lower_threshold:
                    self.lower_threshold = 0.1
        else:
            self.lower_threshold = lower_threshold

        # set other variables to provided values or defaults
        self.strong_stop = strong_stop
        self.lower_alternative = lower_alternative
        self.use_replacement_table = use_replacement_table
        self.use_frequency = use_frequency
        self.use_highest_frequency_if_ambiguous = use_highest_frequency_if_ambiguous

        # generate a list of ambiguous DNA letters only (IUPACData.ambiguous_dna_letters also includes the unambiguous
        # G, C, A and T.
        self.ambiguous_dna_letters = list(set(IUPACData.ambiguous_dna_letters) - set(IUPACData.unambiguous_dna_letters))

        # check if translation table id is > 15. Values > 15 cannot be mapped to http://www.kazusa.or.jp/codon/!
        if translation_table_origin > 15 or translation_table_host > 15:
            raise ValueError('Though the NCBI lists more than 15 translation tables, CHarm is limited to the '
                             'first 15 as listed on \'http://www.kazusa.or.jp/codon/\'.')
        # Set translation table for original sequence
        self.translation_table_origin = CodonTable.ambiguous_dna_by_id[int(translation_table_origin)]
        self.translation_table_host = CodonTable.ambiguous_dna_by_id[int(translation_table_host)]
        # Reformat and sanitize sequence string (remove whitespaces, change to uppercase)
        if type(sequence) is 'str':
            try:
                # if a string is provided, check if it contains U and not T to distinguish between RNA and DNA
                if 'U' in sequence and not 'T' in sequence:
                    seq = Seq(''.join(sequence.upper().split()), IUPAC.ambiguous_rna)
                    # if RNA, convert to DNA alphabet
                    self.original_sequence = seq.back_transcribe()
                else:
                    self.original_sequence = Seq(''.join(sequence.upper().split()), IUPAC.ambiguous_dna)
            except ValueError as error:
                print('ERROR: {}'.format(error))
                exit(1)
        else:
            self.original_sequence = sequence
        # Translate original DNA sequence to amino acid sequence
        self.original_translated_sequence = self.translate_sequence(self.original_sequence,
                                                                    self.translation_table_origin, cds=True)
        # Initialize empty harmonize sequence
        self.harmonized_sequence = ''

        # Fetch codon usage tables for original and host organism
        self.usage_origin = CodonUsageTable('http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?'
                                            'species={}&aa={}&style=N'.format(origin_id,
                                                                              translation_table_origin),
                                            self.use_frequency)
        self.usage_host = CodonUsageTable('http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi?'
                                          'species={}&aa={}&style=N'.format(host_id,
                                                                            translation_table_host),
                                          self.use_frequency)
        # Split DNA sequence into list of codons
        self.codons = self.split_original_sequence_to_codons()
        # Harmonize codon usage
        self.harmonize_codons()
        # Construct new sequence out of harmonized codons
        self.harmonized_sequence = self.construct_new_sequence()
        # Translate harmonized DNA sequence to amino acid sequence
        self.harmonized_translated_sequence = self.translate_sequence(self.harmonized_sequence,
                                                                      self.translation_table_host, cds=True)
コード例 #24
0
print IUPAC.ambiguous_dna.letters    # letras IUPAC de bases de adn
print IUPAC.ExtendedIUPACProtein.letters    # letras de todas las proteínas existentes
print IUPAC.ExtendedIUPACDNA.letters    # letras de todas las bases existentes

from Bio.Seq import Seq
seq = Seq('CCGGTT',IUPAC.unambiguous_dna)
print seq
seq=seq.transcribe()	#must be DNA to transcribe to RNA
print seq
seq=seq.translate()		#must be DNA to translate to protein
print seq

#tipo de dato secuencia
seq=Seq('CCGGUU',IUPAC.IUPACUnambiguousRNA())	#constructor class IUPAC...RNA
print seq
print seq.back_transcribe()	#must be RNA to backtranscribe to DNA

seq=Seq('ATGGTCTTTCCAGACGCG',IUPAC.unambiguous_dna)
print Seq.transcribe(seq)	#as function, up is as method

print seq[:5]	#methods as string
print len(seq)
#seq[0]='C'	#aren't mutables
st=str(seq)		#toString
print st

#tipo de dato secuencia editable
from Bio.Seq import MutableSeq
mut_seq=seq.tomutable()	#convertirlo a tipo seq mutable
print mut_seq
mut_seq[0]='C'