Exemple #1
0
def remove_restriction_sites(dna_sequence, restrict_sites):
    logger.info("===== REMOVE RESTRICTION SITES =====")

    # check each unwanted restriction site
    analysis = Analysis(restrictionbatch=restrict_sites, sequence=dna_sequence)
    result = analysis.full()

    mutable_seq = dna_sequence.tomutable()
    for enz, cuts in result.items():
        for cut in cuts:
            logger.info(
                "The restriction enzyme {0} can cut the sequence before position {1}!"
                .format(str(enz), cuts))
            # map sequence position to codon position
            # subtract 1 from `cut` to convert from sequence to string indexing
            codon_pos, offset = divmod((cut - 1) - (enz.size // 2), 3)

            # ensure the whole codon we mutate is being recognized by the restriction enzyme
            if offset:
                codon_pos += 1
            codon_idx = slice(codon_pos * 3, (codon_pos + 1) * 3)

            new_codon = mutate_codon(mutable_seq[codon_idx], codon_use_table)
            mutable_seq[codon_idx] = new_codon

    return mutable_seq.toseq()
Exemple #2
0
 def test_change(self):
     """Test that change() changes something."""
     seq = Seq('CCAGTCTATAATTCG' + BamHI.site + 'GCGGCATCATACTCGA' +
               BamHI.site + 'ATATCGCGTGATGATA' + EcoRV.site +
               'CGTAGTAATTACGCATG')
     batch = NdeI + EcoRI + BamHI + BsmBI
     analysis = Analysis(batch, seq)
     self.assertEqual(analysis.full()[BamHI], [17, 39])
     batch = NdeI + EcoRI + BsmBI
     seq += NdeI.site
     analysis.change(sequence=seq)
     analysis.change(rb=batch)
     self.assertEqual(len(analysis.full()), 3)
     self.assertEqual(analysis.full()[NdeI], [85])
     with self.assertRaises(AttributeError):
         analysis.change(**{'NameWidth': 3, 'KonsoleWidth': 40})  # Console
Exemple #3
0
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis
     """
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     self.assertEqual(
         ana.blunt(),
         {EcoRV: []})  # output only the result for enzymes which cut blunt
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     self.assertEqual(
         ana.with_sites(),
         {EcoRI: [33]
          })  # output only the result for enzymes which have a site
     self.assertEqual(ana.without_site(), {
         KpnI: [],
         EcoRV: []
     })  # output only the enzymes which have no site
     self.assertEqual(ana.with_site_size([32]), {})
     self.assertEqual(ana.only_between(1, 20),
                      {})  # the enzymes which cut between position 1 and 20
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})  # etc...
     self.assertEqual(ana.only_between(34, 20),
                      {EcoRI: [33]})  # mix start end order
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     self.assertEqual((ana._boundaries(20, 1)[:2]),
                      (1, 20))  # reverse order
     self.assertEqual((ana._boundaries(-1, 20)[:2]),
                      (20, 33))  # fix negative start
 def test_change(self):
     """Test that change() changes something."""
     seq = Seq('CCAGTCTATAATTCG' + BamHI.site +
               'GCGGCATCATACTCGA' + BamHI.site +
               'ATATCGCGTGATGATA' + EcoRV.site +
               'CGTAGTAATTACGCATG')
     batch = NdeI + EcoRI + BamHI + BsmBI
     analysis = Analysis(batch, seq)
     self.assertEqual(analysis.full()[BamHI], [17, 39])
     batch = NdeI + EcoRI + BsmBI
     seq += NdeI.site
     analysis.change(sequence=seq)
     analysis.change(rb=batch)
     self.assertEqual(len(analysis.full()), 3)
     self.assertEqual(analysis.full()[NdeI], [85])
     with self.assertRaises(AttributeError):
         analysis.change(**{'NameWidth': 3, 'KonsoleWidth': 40})  # Console
Exemple #5
0
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis."""
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     # Output only the result for enzymes which cut blunt:
     self.assertEqual(ana.blunt(), {EcoRV: []})
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only the result for enzymes which have a site:
     self.assertEqual(ana.with_sites(), {EcoRI: [33]})
     # Output only the enzymes which have no site:
     self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []})
     self.assertEqual(ana.with_site_size([32]), {})
     # Output only enzymes which produce 5' overhangs
     self.assertEqual(ana.overhang5(), {EcoRI: [33]})
     # Output only enzymes which produce 3' overhangs
     self.assertEqual(ana.overhang3(), {KpnI: []})
     # Output only enzymes which produce defined ends
     self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only enzymes hich cut N times
     self.assertEqual(ana.with_N_sites(2), {})
     # The enzymes which cut between position x and y:
     with self.assertRaises(TypeError):
         ana.only_between('t', 20)
     with self.assertRaises(TypeError):
         ana.only_between(1, 't')
     self.assertEqual(ana.only_between(1, 20), {})
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})
     # Mix start/end order:
     self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]})
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     # Reverse order:
     self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20))
     # Fix negative start:
     self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33))
     # Fix negative end:
     self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33))
     # Sites in- and outside of boundaries
     new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI])
     ana = Analysis(rb, new_seq)
     # Cut at least inside
     self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]})
     # Cut at least inside and report only inside site
     self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]})
     # Cut at least outside
     self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]})
     # Don't cut within
     self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis."""
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     # Output only the result for enzymes which cut blunt:
     self.assertEqual(ana.blunt(), {EcoRV: []})
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only the result for enzymes which have a site:
     self.assertEqual(ana.with_sites(), {EcoRI: [33]})
     # Output only the enzymes which have no site:
     self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []})
     self.assertEqual(ana.with_site_size([32]), {})
     # Output only enzymes which produce 5' overhangs
     self.assertEqual(ana.overhang5(), {EcoRI: [33]})
     # Output only enzymes which produce 3' overhangs
     self.assertEqual(ana.overhang3(), {KpnI: []})
     # Output only enzymes which produce defined ends
     self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only enzymes hich cut N times
     self.assertEqual(ana.with_N_sites(2), {})
     # The enzymes which cut between position x and y:
     with self.assertRaises(TypeError):
         ana.only_between('t', 20)
     with self.assertRaises(TypeError):
         ana.only_between(1, 't')
     self.assertEqual(ana.only_between(1, 20), {})
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})
     # Mix start/end order:
     self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]})
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     # Reverse order:
     self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20))
     # Fix negative start:
     self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33))
     # Fix negative end:
     self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33))
     # Sites in- and outside of boundaries
     new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI])
     ana = Analysis(rb, new_seq)
     # Cut at least inside
     self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]})
     # Cut at least inside and report only inside site
     self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]})
     # Cut at least outside
     self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]})
     # Don't cut within
     self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})
Exemple #7
0
def eval_restriction_sites(individual, restrict_sites):
    """
    TODO: Make it remove rest sites
    """
    assert (individual is SequenceContainer)
    sequence = getattr(individual, "sequence")
    # check unwanted restriction sites
    analysis = Analysis(restrictionbatch=restrict_sites, sequence=sequence)
    result = analysis.full()
    # score the sequence based on the number of restriction sites
    score = 0
    for enz, cuts in result.items():
        for cut in cuts:
            score += 1
    return score
def remove_restriction_sites(dna_sequence, codon_use_table, restrict_sites):
    """Identify and remove seuences recognized by a set of restriction
    enzymes.

    Args:
        dna_sequence (Bio.Seq.Seq): A read-only representation of
            the DNA sequence.
        codon_use_table (dict{str, list[list, list]}): A dictionary with
            each amino acid three-letter code as keys, and a list of two
            lists as values. The first list is the synonymous codons that
            encode the amino acid, the second is the frequency with which
            each synonymouscodon is used.
        restrict_sites (Bio.Restriction.RestrictionBatch): RestrictionBatch
            instance configured with the input restriction enzymes.

    Returns:
        Bio.Seq.Seq: A read-only representation of the new DNA sequence.
    """

    logger.info("Removing restriction sites")

    # check each unwanted restriction site
    analysis = Analysis(restrictionbatch=restrict_sites, sequence=dna_sequence)
    result = analysis.full()

    mutable_seq = dna_sequence.tomutable()
    for enz, cuts in result.items():
        for cut in cuts:
            logger.info(
                "Restriction enzyme ({}) cut site detected at position {}.".
                format(str(enz), cuts))
            # map sequence position to codon position
            # subtract 1 from `cut` to convert from sequence to string indexing
            codon_pos, offset = divmod((cut - 1) - (enz.size // 2), 3)

            # ensure the whole codon we mutate is being recognized by the restriction enzyme
            if offset:
                codon_pos += 1
            codon_idx = slice(codon_pos * 3, (codon_pos + 1) * 3)

            new_codon = mutate_codon(mutable_seq[codon_idx], codon_use_table)
            mutable_seq[codon_idx] = new_codon

    return mutable_seq.toseq()
Exemple #9
0
def findRestrictionSites(sequence, restr_batch):
    mySeq = Seq(sequence, IUPACAmbiguousDNA())
    rb = RestrictionBatch(restr_batch)
    analyze = Analysis(rb, mySeq)

    return analyze.full()
Exemple #10
0
def get_restriction_table(seq, enzyme, circular=False):
    """
    Get the restriction table for a single genomic sequence.

    Parameters
    ----------
    seq : Seq object
        A biopython Seq object representing a chromosomes or contig.
    enzyme : int, str or list of str
        The name of the restriction enzyme used, or a list of restriction
        enzyme names. Can also be an integer, to digest by fixed chunk size.
    circular : bool
        Wether the genome is circular.

    Returns
    -------
    numpy.array:
        List of restriction fragment boundary positions for the input sequence.
    
    >>> from Bio.Seq import Seq
    >>> get_restriction_table(Seq("AAGATCGATCGG"),"DpnII")
    array([ 0,  2,  6, 12])
    >>> get_restriction_table(Seq("AA"),["DpnII", "HinfI"])
    array([0, 2])
    >>> get_restriction_table(Seq("AA"),"aeiou1")
    Traceback (most recent call last):
        ...
    ValueError: aeiou1 is not a valid restriction enzyme.
    >>> get_restriction_table("AA","DpnII")
    Traceback (most recent call last):
        ...
    TypeError: Expected Seq or MutableSeq instance, got <class 'str'> instead

    """
    chrom_len = len(seq)
    wrong_enzyme = "{} is not a valid restriction enzyme.".format(enzyme)
    # Restriction batch containing the restriction enzyme
    try:
        enz = [enzyme] if isinstance(enzyme, str) else enzyme
        cutter = RestrictionBatch(enz)
    except (TypeError, ValueError):
        try:
            cutter = max(int(enzyme), DEFAULT_MIN_CHUNK_SIZE)
        except ValueError:
            raise ValueError(wrong_enzyme)

    # Conversion from string type to restriction type
    if isinstance(cutter, int):
        sites = [i for i in range(0, chrom_len, cutter)]
        if sites[-1] < chrom_len:
            sites.append(chrom_len)
    else:
        # Find sites of all restriction enzymes given
        ana = Analysis(cutter, seq, linear=not circular)
        sites = ana.full()
        # Gets all sites into a single flat list with 0-based index
        sites = [site - 1 for enz in sites.values() for site in enz]
        # Sort by position and allow first add start and end of seq
        sites.sort()
        sites.insert(0, 0)
        sites.append(chrom_len)

    return np.array(sites)