Esempio n. 1
0
 def test_fragment_with_duplicate_in_pair_1(self):
     # Ensure the reducer catches a fragment duplicate of pair[0]
     p = list(test_utils.pair1())
     self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
     test_utils.erase_read2(p)
     self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
     self.__reducer.reduce(self.__ctx)
     # now ensure that the pair was emitted, but not the fragment
     self.__ensure_only_pair1_emitted()
     self.assertEqual(1, len(self.__ctx.emitted.keys()))
     self.assertEqual(2, len(self.__ctx.emitted.values()[0])) # two SAM records associated with the key (for the pair)
     # check counter
     self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name()))
     self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name()))
     self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 2
0
    def test_fragment_with_duplicate_in_pair_1_no_discard(self):
        # Ensure the reducer catches a fragment duplicate of pair[0]
        p = list(test_utils.pair1())
        self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
        p = test_utils.erase_read2(p)
        self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
        self.__reducer.discard_duplicates = False
        self.__reducer.reduce(self.__ctx)
        # now ensure that both were emitted, but the fragment is marked as duplicate
        self.__ensure_pair1_emitted()
        self.assertEqual(1, len(self.__ctx.emitted.keys()))
        self.assertEqual(3, len(self.__ctx.emitted.values()[0])) # 3 SAM records associated with the key (for the pair)

        # make sure we have a read with the duplicate flag set
        regexp = "(\d+)\s+.*"
        flags = [ int(re.match(regexp, value).group(1)) for value in self.__ctx.emitted.values()[0] ]
        dup_flags = [ flag for flag in flags if flag & sam_flags.SAM_FDP ]
        self.assertEqual(1, len(dup_flags))
        f = dup_flags[0]
        self.assertTrue( f & sam_flags.SAM_FR1 > 0 ) # ensure the duplicate read is r1
        self.assertTrue( f & sam_flags.SAM_FPD == 0 ) # ensure the duplicate read is unpaired

        # check counter
        self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name()))
        self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name()))
        self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 3
0
    def test_duplicate_fragments_read1_no_discard(self):
        # load pair 1 and erase its second read
        p = list(test_utils.pair1())
        p = test_utils.erase_read2(p)
        p0 = p[0]
        # insert the pair into the context, twice
        self.__ctx.add_value(test_utils.make_key(p[0]),
                             proto.serialize_pair(p))
        self.__ctx.add_value(test_utils.make_key(p[0]),
                             proto.serialize_pair(p))
        self.__reducer.discard_duplicates = False
        self.__reducer.reduce(self.__ctx)
        self.assertEqual(1, len(self.__ctx.emitted.keys()))
        self.assertEqual(2,
                         len(self.__ctx.emitted.values()
                             [0]))  # Two SAM records associated with the key
        short_name = p0.get_name()[0:-2]
        self.assertEqual(short_name, self.__ctx.emitted.keys()[0])
        flags = map(lambda sam: int(*re.match("(\d+).*", sam).groups(1)),
                    self.__ctx.emitted.values()[0])
        # ensure we have one marked as duplicate
        self.assertEqual(
            1, len(filter(lambda flag: flag & sam_flags.SAM_FDP, flags)))
        # and ensure we have one NOT marked as duplicates
        self.assertEqual(
            1, len(filter(lambda flag: flag & sam_flags.SAM_FDP == 0, flags)))

        # check counter
        self.assertFalse(
            self.__ctx.counters.has_key(self.__pair_counter_name()))
        self.assertTrue(self.__ctx.counters.has_key(
            self.__frag_counter_name()))
        self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 4
0
 def test_duplicate_fragments_read1(self):
     # load pair 1
     p = list(test_utils.pair1())
     p = test_utils.erase_read2(p)
     p0 = p[0]
     # insert the pair into the context, twice
     self.__ctx.add_value(test_utils.make_key(p[0]),
                          proto.serialize_pair(p))
     self.__ctx.add_value(test_utils.make_key(p[0]),
                          proto.serialize_pair(p))
     self.__reducer.reduce(self.__ctx)
     self.assertEqual(1, len(self.__ctx.emitted.keys()))
     self.assertEqual(
         1, len(self.__ctx.emitted.values()
                [0]))  # only one SAM record associated with the key
     short_name = p0.get_name()[0:-2]
     self.assertEqual(short_name, self.__ctx.emitted.keys()[0])
     self.assertTrue(
         re.match("\d+\s+%s\s+%d\s+.*" % (p0.tid, p0.pos),
                  self.__ctx.emitted[short_name][0]))
     # check counter
     self.assertFalse(
         self.__ctx.counters.has_key(self.__pair_counter_name()))
     self.assertTrue(self.__ctx.counters.has_key(
         self.__frag_counter_name()))
     self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 5
0
 def test_fragment_with_duplicate_in_pair_1(self):
     # Ensure the reducer catches a fragment duplicate of pair[0]
     p = list(test_utils.pair1())
     self.__ctx.add_value(test_utils.make_key(p[0]),
                          proto.serialize_pair(p))
     test_utils.erase_read2(p)
     self.__ctx.add_value(test_utils.make_key(p[0]),
                          proto.serialize_pair(p))
     self.__reducer.reduce(self.__ctx)
     # now ensure that the pair was emitted, but not the fragment
     self.__ensure_only_pair1_emitted()
     self.assertEqual(1, len(self.__ctx.emitted.keys()))
     self.assertEqual(2, len(
         self.__ctx.emitted.values()
         [0]))  # two SAM records associated with the key (for the pair)
     # check counter
     self.assertFalse(
         self.__ctx.counters.has_key(self.__pair_counter_name()))
     self.assertTrue(self.__ctx.counters.has_key(
         self.__frag_counter_name()))
     self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
	def test_emit_forward_fragment2(self):
		# Fragment in pair[0].  None in pair[1]
		self.pair1 = test_utils.erase_read2(list(self.pair1))
		self.link.process(self.pair1)
		self.assertEqual(1, len(self.ctx.emitted.keys()))
		expected_key = test_utils.make_key(self.pair1[0])
		self.assertEqual(1, len(self.ctx.emitted[expected_key]))
		unserialized = proto.unserialize_pair(self.ctx.emitted[expected_key][0])
		self.assertTrue(unserialized[1] is None)
		self.assertEqual(self.pair1[0].tid, unserialized[0].tid)
		self.assertEqual(self.pair1[0].pos, unserialized[0].pos)
		self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES"))
		self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
Esempio n. 7
0
 def test_emit_forward_fragment2(self):
     # Fragment in pair[0].  None in pair[1]
     self.pair1 = test_utils.erase_read2(list(self.pair1))
     self.link.process(self.pair1)
     self.assertEqual(1, len(self.ctx.emitted.keys()))
     expected_key = test_utils.make_key(self.pair1[0])
     self.assertEqual(1, len(self.ctx.emitted[expected_key]))
     unserialized = proto.unserialize_pair(
         self.ctx.emitted[expected_key][0])
     self.assertTrue(unserialized[1] is None)
     self.assertEqual(self.pair1[0].tid, unserialized[0].tid)
     self.assertEqual(self.pair1[0].pos, unserialized[0].pos)
     self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES"))
     self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
Esempio n. 8
0
 def test_duplicate_fragments_read1(self):
     # load pair 1
     p = list(test_utils.pair1())
     p = test_utils.erase_read2(p)
     p0 = p[0]
     # insert the pair into the context, twice
     self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
     self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
     self.__reducer.reduce(self.__ctx)
     self.assertEqual(1, len(self.__ctx.emitted.keys()))
     self.assertEqual(1, len(self.__ctx.emitted.values()[0])) # only one SAM record associated with the key
     short_name = p0.get_name()[0:-2]
     self.assertEqual(short_name, self.__ctx.emitted.keys()[0])
     self.assertTrue( re.match("\d+\s+%s\s+%d\s+.*" % (p0.tid, p0.pos), self.__ctx.emitted[short_name][0]) )
     # check counter
     self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name()))
     self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name()))
     self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 9
0
    def test_fragment_with_duplicate_in_pair_1_no_discard(self):
        # Ensure the reducer catches a fragment duplicate of pair[0]
        p = list(test_utils.pair1())
        self.__ctx.add_value(test_utils.make_key(p[0]),
                             proto.serialize_pair(p))
        p = test_utils.erase_read2(p)
        self.__ctx.add_value(test_utils.make_key(p[0]),
                             proto.serialize_pair(p))
        self.__reducer.discard_duplicates = False
        self.__reducer.reduce(self.__ctx)
        # now ensure that both were emitted, but the fragment is marked as duplicate
        self.__ensure_pair1_emitted()
        self.assertEqual(1, len(self.__ctx.emitted.keys()))
        self.assertEqual(3, len(
            self.__ctx.emitted.values()
            [0]))  # 3 SAM records associated with the key (for the pair)

        # make sure we have a read with the duplicate flag set
        regexp = "(\d+)\s+.*"
        flags = [
            int(re.match(regexp, value).group(1))
            for value in self.__ctx.emitted.values()[0]
        ]
        dup_flags = [flag for flag in flags if flag & sam_flags.SAM_FDP]
        self.assertEqual(1, len(dup_flags))
        f = dup_flags[0]
        self.assertTrue(
            f & sam_flags.SAM_FR1 > 0)  # ensure the duplicate read is r1
        self.assertTrue(
            f
            & sam_flags.SAM_FPD == 0)  # ensure the duplicate read is unpaired

        # check counter
        self.assertFalse(
            self.__ctx.counters.has_key(self.__pair_counter_name()))
        self.assertTrue(self.__ctx.counters.has_key(
            self.__frag_counter_name()))
        self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])
Esempio n. 10
0
    def test_duplicate_fragments_read1_no_discard(self):
        # load pair 1 and erase its second read
        p = list(test_utils.pair1())
        p = test_utils.erase_read2(p)
        p0 = p[0]
        # insert the pair into the context, twice
        self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
        self.__ctx.add_value(test_utils.make_key(p[0]), proto.serialize_pair(p))
        self.__reducer.discard_duplicates = False
        self.__reducer.reduce(self.__ctx)
        self.assertEqual(1, len(self.__ctx.emitted.keys()))
        self.assertEqual(2, len(self.__ctx.emitted.values()[0])) # Two SAM records associated with the key
        short_name = p0.get_name()[0:-2]
        self.assertEqual(short_name, self.__ctx.emitted.keys()[0])
        flags = map(lambda sam: int(*re.match("(\d+).*", sam).groups(1)), self.__ctx.emitted.values()[0])
        # ensure we have one marked as duplicate
        self.assertEqual(1, len(filter(lambda flag: flag & sam_flags.SAM_FDP, flags)) )
        # and ensure we have one NOT marked as duplicates
        self.assertEqual(1, len(filter(lambda flag: flag & sam_flags.SAM_FDP == 0, flags)) )

        # check counter
        self.assertFalse(self.__ctx.counters.has_key(self.__pair_counter_name()))
        self.assertTrue(self.__ctx.counters.has_key(self.__frag_counter_name()))
        self.assertEqual(1, self.__ctx.counters[self.__frag_counter_name()])