Ejemplo n.º 1
0
def recodesite(
    input,
    output,
    site,
    clip_left,
    clip_right,
    codon_table,
    codon_usage,
    sampler,
    codon_freq_threshold,
    amber_only,
):
    """Recode a DNA sequence to remove a particular site (e.g., restriction site)

    The site needs to be recognized by Biopython, or it will be treated as a DNA
    sequence. The clipping options should determine the boundaries of the coding
    sequence, which will correspond to the part of the sequence that is
    "recodable".

    INPUT and OUTPUT are paths to fasta files or "-" to specify STDIN/STDOUT.

    """
    if sampler == "weighted":
        usage = ecoli_codon_usage
        if codon_freq_threshold is not None:
            # TODO: this is hardcoded in and there's a leaky abstraction here
            table = standard_dna_table
            usage = zero_low_freq_codons(usage, table, codon_freq_threshold)
        if amber_only:
            usage = zero_non_amber_stops(usage)
        codon_sampler = FreqWeightedCodonSampler(usage=usage)
    elif sampler == "uniform":
        codon_sampler = UniformCodonSampler()

    sites = [site2dna(s) for s in site]
    # sites is now a list[Bio.Seq.Seq]

    for seqrecord in SeqIO.parse(input, "fasta"):
        id_ = seqrecord.id
        cds_start = clip_left
        cds_end = len(seqrecord) - clip_right
        seq = recode_sites_from_cds(
            seqrecord.seq, sites, codon_sampler, cds_start, cds_end
        )
        print_fasta(SeqRecord(seq, id_, description=""), output)
Ejemplo n.º 2
0
 def test_with_two_sites_in_cds(self):
     dna_seq = Seq("GAGATCCGGTCAAGCTTGAATTCAACGCAAGTTGTTAT")
     new_seq = recode_sites_from_cds(
         dna_seq,
         [self.EcoRI, self.HindIII],
         self.codon_sampler,
         self.cds_start,
         self.cds_end,
     )
     orig_trans = dna_seq[self.cds_start:self.cds_end].translate(
         table=self.codon_sampler.table)
     new_trans = new_seq[self.cds_start:self.cds_end].translate(
         table=self.codon_sampler.table)
     assert new_seq.find(self.EcoRI) == -1
     assert new_seq.find(self.HindIII) == -1
     assert new_seq != dna_seq
     assert len(new_seq) == len(dna_seq)
     assert new_seq[:self.cds_start] == dna_seq[:self.cds_start]
     assert new_seq[self.cds_end:] == dna_seq[self.cds_end:]
     assert new_trans == orig_trans