def remove_restriction_sites(dna_sequence, restrict_sites): logger.info("===== REMOVE RESTRICTION SITES =====") # check each unwanted restriction site analysis = Analysis(restrictionbatch=restrict_sites, sequence=dna_sequence) result = analysis.full() mutable_seq = dna_sequence.tomutable() for enz, cuts in result.items(): for cut in cuts: logger.info( "The restriction enzyme {0} can cut the sequence before position {1}!" .format(str(enz), cuts)) # map sequence position to codon position # subtract 1 from `cut` to convert from sequence to string indexing codon_pos, offset = divmod((cut - 1) - (enz.size // 2), 3) # ensure the whole codon we mutate is being recognized by the restriction enzyme if offset: codon_pos += 1 codon_idx = slice(codon_pos * 3, (codon_pos + 1) * 3) new_codon = mutate_codon(mutable_seq[codon_idx], codon_use_table) mutable_seq[codon_idx] = new_codon return mutable_seq.toseq()
def test_change(self): """Test that change() changes something.""" seq = Seq('CCAGTCTATAATTCG' + BamHI.site + 'GCGGCATCATACTCGA' + BamHI.site + 'ATATCGCGTGATGATA' + EcoRV.site + 'CGTAGTAATTACGCATG') batch = NdeI + EcoRI + BamHI + BsmBI analysis = Analysis(batch, seq) self.assertEqual(analysis.full()[BamHI], [17, 39]) batch = NdeI + EcoRI + BsmBI seq += NdeI.site analysis.change(sequence=seq) analysis.change(rb=batch) self.assertEqual(len(analysis.full()), 3) self.assertEqual(analysis.full()[NdeI], [85]) with self.assertRaises(AttributeError): analysis.change(**{'NameWidth': 3, 'KonsoleWidth': 40}) # Console
def test_analysis_restrictions(self): """Test Fancier restriction analysis """ new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI, KpnI, EcoRV]) ana = Analysis(rb, new_seq, linear=False) self.assertEqual( ana.blunt(), {EcoRV: []}) # output only the result for enzymes which cut blunt self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]}) self.assertEqual( ana.with_sites(), {EcoRI: [33] }) # output only the result for enzymes which have a site self.assertEqual(ana.without_site(), { KpnI: [], EcoRV: [] }) # output only the enzymes which have no site self.assertEqual(ana.with_site_size([32]), {}) self.assertEqual(ana.only_between(1, 20), {}) # the enzymes which cut between position 1 and 20 self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) # etc... self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) # mix start end order self.assertEqual(ana.only_outside(20, 34), {}) with self.assertWarns(BiopythonWarning): ana.with_name(['fake']) self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]}) self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20)) self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) # reverse order self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) # fix negative start
def test_analysis_restrictions(self): """Test Fancier restriction analysis.""" new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI, KpnI, EcoRV]) ana = Analysis(rb, new_seq, linear=False) # Output only the result for enzymes which cut blunt: self.assertEqual(ana.blunt(), {EcoRV: []}) self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]}) # Output only the result for enzymes which have a site: self.assertEqual(ana.with_sites(), {EcoRI: [33]}) # Output only the enzymes which have no site: self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []}) self.assertEqual(ana.with_site_size([32]), {}) # Output only enzymes which produce 5' overhangs self.assertEqual(ana.overhang5(), {EcoRI: [33]}) # Output only enzymes which produce 3' overhangs self.assertEqual(ana.overhang3(), {KpnI: []}) # Output only enzymes which produce defined ends self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]}) # Output only enzymes hich cut N times self.assertEqual(ana.with_N_sites(2), {}) # The enzymes which cut between position x and y: with self.assertRaises(TypeError): ana.only_between('t', 20) with self.assertRaises(TypeError): ana.only_between(1, 't') self.assertEqual(ana.only_between(1, 20), {}) self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) # Mix start/end order: self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) self.assertEqual(ana.only_outside(20, 34), {}) with self.assertWarns(BiopythonWarning): ana.with_name(['fake']) self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]}) self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20)) # Reverse order: self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) # Fix negative start: self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) # Fix negative end: self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33)) # Sites in- and outside of boundaries new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI]) ana = Analysis(rb, new_seq) # Cut at least inside self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]}) # Cut at least inside and report only inside site self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]}) # Cut at least outside self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]}) # Don't cut within self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})
def eval_restriction_sites(individual, restrict_sites): """ TODO: Make it remove rest sites """ assert (individual is SequenceContainer) sequence = getattr(individual, "sequence") # check unwanted restriction sites analysis = Analysis(restrictionbatch=restrict_sites, sequence=sequence) result = analysis.full() # score the sequence based on the number of restriction sites score = 0 for enz, cuts in result.items(): for cut in cuts: score += 1 return score
def remove_restriction_sites(dna_sequence, codon_use_table, restrict_sites): """Identify and remove seuences recognized by a set of restriction enzymes. Args: dna_sequence (Bio.Seq.Seq): A read-only representation of the DNA sequence. codon_use_table (dict{str, list[list, list]}): A dictionary with each amino acid three-letter code as keys, and a list of two lists as values. The first list is the synonymous codons that encode the amino acid, the second is the frequency with which each synonymouscodon is used. restrict_sites (Bio.Restriction.RestrictionBatch): RestrictionBatch instance configured with the input restriction enzymes. Returns: Bio.Seq.Seq: A read-only representation of the new DNA sequence. """ logger.info("Removing restriction sites") # check each unwanted restriction site analysis = Analysis(restrictionbatch=restrict_sites, sequence=dna_sequence) result = analysis.full() mutable_seq = dna_sequence.tomutable() for enz, cuts in result.items(): for cut in cuts: logger.info( "Restriction enzyme ({}) cut site detected at position {}.". format(str(enz), cuts)) # map sequence position to codon position # subtract 1 from `cut` to convert from sequence to string indexing codon_pos, offset = divmod((cut - 1) - (enz.size // 2), 3) # ensure the whole codon we mutate is being recognized by the restriction enzyme if offset: codon_pos += 1 codon_idx = slice(codon_pos * 3, (codon_pos + 1) * 3) new_codon = mutate_codon(mutable_seq[codon_idx], codon_use_table) mutable_seq[codon_idx] = new_codon return mutable_seq.toseq()
def findRestrictionSites(sequence, restr_batch): mySeq = Seq(sequence, IUPACAmbiguousDNA()) rb = RestrictionBatch(restr_batch) analyze = Analysis(rb, mySeq) return analyze.full()
def get_restriction_table(seq, enzyme, circular=False): """ Get the restriction table for a single genomic sequence. Parameters ---------- seq : Seq object A biopython Seq object representing a chromosomes or contig. enzyme : int, str or list of str The name of the restriction enzyme used, or a list of restriction enzyme names. Can also be an integer, to digest by fixed chunk size. circular : bool Wether the genome is circular. Returns ------- numpy.array: List of restriction fragment boundary positions for the input sequence. >>> from Bio.Seq import Seq >>> get_restriction_table(Seq("AAGATCGATCGG"),"DpnII") array([ 0, 2, 6, 12]) >>> get_restriction_table(Seq("AA"),["DpnII", "HinfI"]) array([0, 2]) >>> get_restriction_table(Seq("AA"),"aeiou1") Traceback (most recent call last): ... ValueError: aeiou1 is not a valid restriction enzyme. >>> get_restriction_table("AA","DpnII") Traceback (most recent call last): ... TypeError: Expected Seq or MutableSeq instance, got <class 'str'> instead """ chrom_len = len(seq) wrong_enzyme = "{} is not a valid restriction enzyme.".format(enzyme) # Restriction batch containing the restriction enzyme try: enz = [enzyme] if isinstance(enzyme, str) else enzyme cutter = RestrictionBatch(enz) except (TypeError, ValueError): try: cutter = max(int(enzyme), DEFAULT_MIN_CHUNK_SIZE) except ValueError: raise ValueError(wrong_enzyme) # Conversion from string type to restriction type if isinstance(cutter, int): sites = [i for i in range(0, chrom_len, cutter)] if sites[-1] < chrom_len: sites.append(chrom_len) else: # Find sites of all restriction enzymes given ana = Analysis(cutter, seq, linear=not circular) sites = ana.full() # Gets all sites into a single flat list with 0-based index sites = [site - 1 for enz in sites.values() for site in enz] # Sort by position and allow first add start and end of seq sites.sort() sites.insert(0, 0) sites.append(chrom_len) return np.array(sites)