def revtrans( input, output, codon_table, codon_usage, sampler, codon_freq_threshold, amber_only ): """Reverse translate amino acid sequences into DNA This operation randomly samples codons for each amino acid, so multiple runs of this tool on the same input can produce different results Note: only the Standard codon table is currently implemented. Note: only the E. coli codon usage is currently implemented. INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT. """ if sampler == "weighted": usage = ecoli_codon_usage if codon_freq_threshold is not None: # TODO: this is hardcoded in and there's a leaky abstraction here table = standard_dna_table usage = zero_low_freq_codons(usage, table, codon_freq_threshold) if amber_only: usage = zero_non_amber_stops(usage) codon_sampler = FreqWeightedCodonSampler(usage=usage) elif sampler == "uniform": codon_sampler = UniformCodonSampler() for seqrecord in tqdm(SeqIO.parse(input, "fasta"), desc="revtrans", unit="seq"): dna_id = seqrecord.id dna_seq = reverse_translate(seqrecord.seq, codon_sampler) print_fasta(SeqRecord(dna_seq, dna_id, description=""), output)
def test_zero_non_amber(self): zeroed_weight = (ecoli_codon_usage.freq[ochre_codon] + ecoli_codon_usage.freq[opal_codon]) new_usage = zero_non_amber_stops(ecoli_codon_usage) for codon in new_usage.freq: if codon == ochre_codon or codon == opal_codon: continue inflated_freq = ecoli_codon_usage.freq[codon] / (1 - zeroed_weight) new_freq = new_usage.freq[codon] assert isclose(new_freq, inflated_freq) assert new_usage.freq[ochre_codon] == 0 assert new_usage.freq[opal_codon] == 0
def test_zero_non_amber(self): with warnings.catch_warnings(): # biopython Seq.__hash__ warnings.simplefilter("ignore") zeroed_weight = (ecoli_codon_usage.freq[ochre_codon] + ecoli_codon_usage.freq[opal_codon]) new_usage = zero_non_amber_stops(ecoli_codon_usage) for codon in new_usage.freq: if codon == ochre_codon or codon == opal_codon: continue inflated_freq = ecoli_codon_usage.freq[codon] / (1 - zeroed_weight) new_freq = new_usage.freq[codon] assert isclose(new_freq, inflated_freq) assert new_usage.freq[ochre_codon] == 0 assert new_usage.freq[opal_codon] == 0
def recodesite( input, output, site, clip_left, clip_right, codon_table, codon_usage, sampler, codon_freq_threshold, amber_only, ): """Recode a DNA sequence to remove a particular site (e.g., restriction site) The site needs to be recognized by Biopython, or it will be treated as a DNA sequence. The clipping options should determine the boundaries of the coding sequence, which will correspond to the part of the sequence that is "recodable". INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT. """ if sampler == "weighted": usage = ecoli_codon_usage if codon_freq_threshold is not None: # TODO: this is hardcoded in and there's a leaky abstraction here table = standard_dna_table usage = zero_low_freq_codons(usage, table, codon_freq_threshold) if amber_only: usage = zero_non_amber_stops(usage) codon_sampler = FreqWeightedCodonSampler(usage=usage) elif sampler == "uniform": codon_sampler = UniformCodonSampler() sites = [site2dna(s) for s in site] # sites is now a list[Bio.Seq.Seq] for seqrecord in SeqIO.parse(input, "fasta"): id_ = seqrecord.id cds_start = clip_left cds_end = len(seqrecord) - clip_right seq = recode_sites_from_cds( seqrecord.seq, sites, codon_sampler, cds_start, cds_end ) print_fasta(SeqRecord(seq, id_, description=""), output)