def test_too_small_k(self): a = probe.Probe.from_str('ABCDEFGHIJ') b = probe.Probe.from_str('ZYXWVUTSRQ') probes = [a, b] with self.assertRaises(probe.PigeonholeRequiresTooSmallKmerSizeError): # Should pick k=5, but requires k=6 probe._construct_pigeonholed_kmer_probe_map(probes, 1, min_k=6) with self.assertRaises(probe.PigeonholeRequiresTooSmallKmerSizeError): # Should pick k=2, but requires k=3 probe._construct_pigeonholed_kmer_probe_map(probes, 3, min_k=3)
def test_pigeonholed_kmer_map(self): a = probe.Probe.from_str('ABCDEFGH') b = probe.Probe.from_str('ZYXWVUAB') probes = [a, b] kmer_map = probe._construct_pigeonholed_kmer_probe_map( probes, 3, min_k=2, include_positions=True) shared_kmer_map = probe.SharedKmerProbeMap.construct(kmer_map) # Should pick k=2 a_str = a.seq_str b_str = b.seq_str self.assertEqual(len(kmer_map), 7) self.assertCountEqual(shared_kmer_map.get('AB'), [(a_str, 0), (b_str, 6)]) self.assertCountEqual(shared_kmer_map.get('CD'), [(a_str, 2)]) self.assertCountEqual(shared_kmer_map.get('EF'), [(a_str, 4)]) self.assertCountEqual(shared_kmer_map.get('GH'), [(a_str, 6)]) self.assertCountEqual(shared_kmer_map.get('ZY'), [(b_str, 0)]) self.assertCountEqual(shared_kmer_map.get('XW'), [(b_str, 2)]) self.assertCountEqual(shared_kmer_map.get('VU'), [(b_str, 4)]) self.assertIsNone(shared_kmer_map.get('MN')) self.assertEqual(shared_kmer_map.k, 2)
def test_shared_kmer(self): a = probe.Probe.from_str('ABCDEFGHIJ') b = probe.Probe.from_str('ZYXWVABCDE') probes = [a, b] kmer_map = probe._construct_pigeonholed_kmer_probe_map( probes, 1, min_k=2) # Should pick k=5 self.assertEqual(len(kmer_map), 3) self.assertCountEqual(kmer_map['ABCDE'], [a, b]) self.assertCountEqual(kmer_map['FGHIJ'], [a]) self.assertCountEqual(kmer_map['ZYXWV'], [b])
def test_no_mismatches(self): a = probe.Probe.from_str('ABCDEFGHIJ') b = probe.Probe.from_str('ZYXWVUTSRQ') probes = [a, b] kmer_map = probe._construct_pigeonholed_kmer_probe_map( probes, 0, min_k=5) # k-mers equal to the full length of the probe should be # chosen self.assertTrue(a in kmer_map[a.seq_str]) self.assertTrue(b in kmer_map[b.seq_str]) self.assertFalse(a in kmer_map[b.seq_str]) self.assertFalse(b in kmer_map[a.seq_str])
def test_positions(self): a = probe.Probe.from_str('ABCDEFGH') b = probe.Probe.from_str('ZYXWVUAB') probes = [a, b] kmer_map = probe._construct_pigeonholed_kmer_probe_map( probes, 3, min_k=2, include_positions=True) # Should pick k=2 self.assertEqual(len(kmer_map), 7) self.assertCountEqual(kmer_map['AB'], [(a, 0), (b, 6)]) self.assertCountEqual(kmer_map['CD'], [(a, 2)]) self.assertCountEqual(kmer_map['EF'], [(a, 4)]) self.assertCountEqual(kmer_map['GH'], [(a, 6)]) self.assertCountEqual(kmer_map['ZY'], [(b, 0)]) self.assertCountEqual(kmer_map['XW'], [(b, 2)]) self.assertCountEqual(kmer_map['VU'], [(b, 4)])