Esempio n. 1
0
def revtrans(
    input, output, codon_table, codon_usage, sampler, codon_freq_threshold, amber_only
):
    """Reverse translate amino acid sequences into DNA

    This operation randomly samples codons for each amino acid, so multiple runs
    of this tool on the same input can produce different results

    Note: only the Standard codon table is currently implemented.
    Note: only the E. coli codon usage is currently implemented.

    INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT.

    """
    if sampler == "weighted":
        usage = ecoli_codon_usage
        if codon_freq_threshold is not None:
            # TODO: this is hardcoded in and there's a leaky abstraction here
            table = standard_dna_table
            usage = zero_low_freq_codons(usage, table, codon_freq_threshold)
        if amber_only:
            usage = zero_non_amber_stops(usage)
        codon_sampler = FreqWeightedCodonSampler(usage=usage)
    elif sampler == "uniform":
        codon_sampler = UniformCodonSampler()
    for seqrecord in tqdm(SeqIO.parse(input, "fasta"), desc="revtrans", unit="seq"):
        dna_id = seqrecord.id
        dna_seq = reverse_translate(seqrecord.seq, codon_sampler)
        print_fasta(SeqRecord(dna_seq, dna_id, description=""), output)
Esempio n. 2
0
def recodesite(
    input,
    output,
    site,
    clip_left,
    clip_right,
    codon_table,
    codon_usage,
    sampler,
    codon_freq_threshold,
    amber_only,
):
    """Recode a DNA sequence to remove a particular site (e.g., restriction site)

    The site needs to be recognized by Biopython, or it will be treated as a DNA
    sequence. The clipping options should determine the boundaries of the coding
    sequence, which will correspond to the part of the sequence that is
    "recodable".

    INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT.

    """
    if sampler == "weighted":
        usage = ecoli_codon_usage
        if codon_freq_threshold is not None:
            # TODO: this is hardcoded in and there's a leaky abstraction here
            table = standard_dna_table
            usage = zero_low_freq_codons(usage, table, codon_freq_threshold)
        if amber_only:
            usage = zero_non_amber_stops(usage)
        codon_sampler = FreqWeightedCodonSampler(usage=usage)
    elif sampler == "uniform":
        codon_sampler = UniformCodonSampler()

    sites = [site2dna(s) for s in site]
    # sites is now a list[Bio.Seq.Seq]

    for seqrecord in SeqIO.parse(input, "fasta"):
        id_ = seqrecord.id
        cds_start = clip_left
        cds_end = len(seqrecord) - clip_right
        seq = recode_sites_from_cds(
            seqrecord.seq, sites, codon_sampler, cds_start, cds_end
        )
        print_fasta(SeqRecord(seq, id_, description=""), output)