def test_analysis_restrictions(self): """Test Fancier restriction analysis """ new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI, KpnI, EcoRV]) ana = Analysis(rb, new_seq, linear=False) self.assertEqual( ana.blunt(), {EcoRV: []}) # output only the result for enzymes which cut blunt self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]}) self.assertEqual( ana.with_sites(), {EcoRI: [33] }) # output only the result for enzymes which have a site self.assertEqual(ana.without_site(), { KpnI: [], EcoRV: [] }) # output only the enzymes which have no site self.assertEqual(ana.with_site_size([32]), {}) self.assertEqual(ana.only_between(1, 20), {}) # the enzymes which cut between position 1 and 20 self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) # etc... self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) # mix start end order self.assertEqual(ana.only_outside(20, 34), {}) with self.assertWarns(BiopythonWarning): ana.with_name(['fake']) self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]}) self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20)) self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) # reverse order self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) # fix negative start
def enzyme_selector(sequence, restriction_interval, genome_frequency=False, deterministic_overhangs=False, rb=False): """ Usage Example: from utils import extract_feature sequence,_ = extract_feature(sequence_id="AJ627603", data_dir="/home/chymera/data2/gt.ep/sequences/", feature_names=["Cre", "cre", "CRE"]) outp = enzyme_selector(sequence=sequence, restriction_interval=[0,690], genome_frequency=[700,2000], deterministic_overhangs=True) print outp """ from Bio.Restriction import Analysis, AllEnzymes, RestrictionBatch if not rb: basic_analysis = Analysis(AllEnzymes, sequence.seq) else: basic_analysis = Analysis(rb, sequence.seq) respect_target = basic_analysis.only_between(restriction_interval[0],restriction_interval[1]) # print respect_target if genome_frequency: respect_frequency = respect_target for enzyme, item in respect_target.items(): if enzyme.frequency() < genome_frequency[0] or enzyme.frequency() > genome_frequency[1]: del respect_frequency[enzyme] else: if deterministic_overhangs: from utils import overhangs if any(bp_ID in overhangs(enzyme) for bp_ID in ["N", "R", "Y", "!!!", "S", "W", "M", "K", "B", "D", "H", "V"]) or overhangs(enzyme) == "": del respect_frequency[enzyme] return respect_frequency
def test_analysis_restrictions(self): """Test Fancier restriction analysis.""" new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI, KpnI, EcoRV]) ana = Analysis(rb, new_seq, linear=False) # Output only the result for enzymes which cut blunt: self.assertEqual(ana.blunt(), {EcoRV: []}) self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]}) # Output only the result for enzymes which have a site: self.assertEqual(ana.with_sites(), {EcoRI: [33]}) # Output only the enzymes which have no site: self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []}) self.assertEqual(ana.with_site_size([32]), {}) # Output only enzymes which produce 5' overhangs self.assertEqual(ana.overhang5(), {EcoRI: [33]}) # Output only enzymes which produce 3' overhangs self.assertEqual(ana.overhang3(), {KpnI: []}) # Output only enzymes which produce defined ends self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]}) # Output only enzymes hich cut N times self.assertEqual(ana.with_N_sites(2), {}) # The enzymes which cut between position x and y: with self.assertRaises(TypeError): ana.only_between('t', 20) with self.assertRaises(TypeError): ana.only_between(1, 't') self.assertEqual(ana.only_between(1, 20), {}) self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]}) # Mix start/end order: self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]}) self.assertEqual(ana.only_outside(20, 34), {}) with self.assertWarns(BiopythonWarning): ana.with_name(['fake']) self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]}) self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20)) # Reverse order: self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20)) # Fix negative start: self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33)) # Fix negative end: self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33)) # Sites in- and outside of boundaries new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA()) rb = RestrictionBatch([EcoRI]) ana = Analysis(rb, new_seq) # Cut at least inside self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]}) # Cut at least inside and report only inside site self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]}) # Cut at least outside self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]}) # Don't cut within self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})