Exemple #1
0
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis
     """
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     self.assertEqual(
         ana.blunt(),
         {EcoRV: []})  # output only the result for enzymes which cut blunt
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     self.assertEqual(
         ana.with_sites(),
         {EcoRI: [33]
          })  # output only the result for enzymes which have a site
     self.assertEqual(ana.without_site(), {
         KpnI: [],
         EcoRV: []
     })  # output only the enzymes which have no site
     self.assertEqual(ana.with_site_size([32]), {})
     self.assertEqual(ana.only_between(1, 20),
                      {})  # the enzymes which cut between position 1 and 20
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})  # etc...
     self.assertEqual(ana.only_between(34, 20),
                      {EcoRI: [33]})  # mix start end order
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     self.assertEqual((ana._boundaries(20, 1)[:2]),
                      (1, 20))  # reverse order
     self.assertEqual((ana._boundaries(-1, 20)[:2]),
                      (20, 33))  # fix negative start
def enzyme_selector(sequence, restriction_interval, genome_frequency=False, deterministic_overhangs=False, rb=False):
	"""
	Usage Example:
		from utils import extract_feature
		sequence,_ = extract_feature(sequence_id="AJ627603", data_dir="/home/chymera/data2/gt.ep/sequences/", feature_names=["Cre", "cre", "CRE"])
		outp = enzyme_selector(sequence=sequence, restriction_interval=[0,690], genome_frequency=[700,2000], deterministic_overhangs=True)
		print outp
	"""


	from Bio.Restriction import Analysis, AllEnzymes, RestrictionBatch

	if not rb:
		basic_analysis = Analysis(AllEnzymes, sequence.seq)
	else:
		basic_analysis = Analysis(rb, sequence.seq)
	respect_target = basic_analysis.only_between(restriction_interval[0],restriction_interval[1])
	# print respect_target

	if genome_frequency:
		respect_frequency = respect_target
	 	for enzyme, item in respect_target.items():
			if enzyme.frequency() < genome_frequency[0] or enzyme.frequency() > genome_frequency[1]:
				del respect_frequency[enzyme]
			else:
				if deterministic_overhangs:
					from utils import overhangs
					if any(bp_ID in overhangs(enzyme) for bp_ID in ["N", "R", "Y", "!!!", "S", "W", "M", "K", "B", "D", "H", "V"]) or overhangs(enzyme) == "":
						del respect_frequency[enzyme]

	return respect_frequency
Exemple #3
0
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis."""
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     # Output only the result for enzymes which cut blunt:
     self.assertEqual(ana.blunt(), {EcoRV: []})
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only the result for enzymes which have a site:
     self.assertEqual(ana.with_sites(), {EcoRI: [33]})
     # Output only the enzymes which have no site:
     self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []})
     self.assertEqual(ana.with_site_size([32]), {})
     # Output only enzymes which produce 5' overhangs
     self.assertEqual(ana.overhang5(), {EcoRI: [33]})
     # Output only enzymes which produce 3' overhangs
     self.assertEqual(ana.overhang3(), {KpnI: []})
     # Output only enzymes which produce defined ends
     self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only enzymes hich cut N times
     self.assertEqual(ana.with_N_sites(2), {})
     # The enzymes which cut between position x and y:
     with self.assertRaises(TypeError):
         ana.only_between('t', 20)
     with self.assertRaises(TypeError):
         ana.only_between(1, 't')
     self.assertEqual(ana.only_between(1, 20), {})
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})
     # Mix start/end order:
     self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]})
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     # Reverse order:
     self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20))
     # Fix negative start:
     self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33))
     # Fix negative end:
     self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33))
     # Sites in- and outside of boundaries
     new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI])
     ana = Analysis(rb, new_seq)
     # Cut at least inside
     self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]})
     # Cut at least inside and report only inside site
     self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]})
     # Cut at least outside
     self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]})
     # Don't cut within
     self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})
 def test_analysis_restrictions(self):
     """Test Fancier restriction analysis."""
     new_seq = Seq('TTCAAAAAAAAAAAAAAAAAAAAAAAAAAAAGAA',
                   IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI, KpnI, EcoRV])
     ana = Analysis(rb, new_seq, linear=False)
     # Output only the result for enzymes which cut blunt:
     self.assertEqual(ana.blunt(), {EcoRV: []})
     self.assertEqual(ana.full(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only the result for enzymes which have a site:
     self.assertEqual(ana.with_sites(), {EcoRI: [33]})
     # Output only the enzymes which have no site:
     self.assertEqual(ana.without_site(), {KpnI: [], EcoRV: []})
     self.assertEqual(ana.with_site_size([32]), {})
     # Output only enzymes which produce 5' overhangs
     self.assertEqual(ana.overhang5(), {EcoRI: [33]})
     # Output only enzymes which produce 3' overhangs
     self.assertEqual(ana.overhang3(), {KpnI: []})
     # Output only enzymes which produce defined ends
     self.assertEqual(ana.defined(), {KpnI: [], EcoRV: [], EcoRI: [33]})
     # Output only enzymes hich cut N times
     self.assertEqual(ana.with_N_sites(2), {})
     # The enzymes which cut between position x and y:
     with self.assertRaises(TypeError):
         ana.only_between('t', 20)
     with self.assertRaises(TypeError):
         ana.only_between(1, 't')
     self.assertEqual(ana.only_between(1, 20), {})
     self.assertEqual(ana.only_between(20, 34), {EcoRI: [33]})
     # Mix start/end order:
     self.assertEqual(ana.only_between(34, 20), {EcoRI: [33]})
     self.assertEqual(ana.only_outside(20, 34), {})
     with self.assertWarns(BiopythonWarning):
         ana.with_name(['fake'])
     self.assertEqual(ana.with_name([EcoRI]), {EcoRI: [33]})
     self.assertEqual((ana._boundaries(1, 20)[:2]), (1, 20))
     # Reverse order:
     self.assertEqual((ana._boundaries(20, 1)[:2]), (1, 20))
     # Fix negative start:
     self.assertEqual((ana._boundaries(-1, 20)[:2]), (20, 33))
     # Fix negative end:
     self.assertEqual((ana._boundaries(1, -1)[:2]), (1, 33))
     # Sites in- and outside of boundaries
     new_seq = Seq('GAATTCAAAAAAGAATTC', IUPACAmbiguousDNA())
     rb = RestrictionBatch([EcoRI])
     ana = Analysis(rb, new_seq)
     # Cut at least inside
     self.assertEqual(ana.between(1, 7), {EcoRI: [2, 14]})
     # Cut at least inside and report only inside site
     self.assertEqual(ana.show_only_between(1, 7), {EcoRI: [2]})
     # Cut at least outside
     self.assertEqual(ana.outside(1, 7), {EcoRI: [2, 14]})
     # Don't cut within
     self.assertEqual(ana.do_not_cut(7, 12), {EcoRI: [2, 14]})