コード例 #1
0
    def test_rmdup_bug(self):
        test_case_data = [
            "HWI-ST200R_251:5:1208:19924:124635#GCCAAT\t83\t20\t6181935\t60\t5S96M\t=\t6181919\t-112\tAAGTGGAAGATTTGGGAATCTGAGTGGATTTGGTAACAGTAGAGGGGTGGATCTGGCTTGGAAAACAATCGAGGTACCAATATAGGTGGTAGATGAATTTT\t?<?AADADBFBF<EHIGHGGGEAF3AF<CHGGDG9?GHFFACDHH)?@AHEHHIIIIE>A=A:?);B27@;@?>,;;C(5::>>>@5:()4>@@@######\tXC:i:96\tXT:A:U\tNM:i:1\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:13G82\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:######@@@>4)(:5@>>>::5(C;;,>?@;@72B;)?:A=A>EIIIIHHEHA@?)HHDCAFFHG?9GDGGHC<FA3FAEGGGHGIHE<FBFBDADAA?<?",
            "HWI-ST200R_251:5:1208:19924:124635#GCCAAT\t163\t20\t6181919\t60\t101M\t=\t6181935\t112\tCTGAGCACACCAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACACCTCTACTGTTACCAAATCCACTCAGATTCCCAA\t@@@FFFDDFHG??;EEH>HHGIGHEGCGEGGIGJG31?DDBBD>FGG@HG??DFBBADFAGII3@EH;;CEHECBB7?>CE.;...5>ACDDA:C:;>:>?\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:29G36C34\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:@@@FFFDDFHG??;EEH>HHGIGHEGCGEGGIGJG31?DDBBD>FGG@HG??DFBBADFAGII3@EH;;CEHECBB7?>CE.;...5>ACDDA:C:;>:>?",
            "HWI-ST200R_251:6:2207:18561:163438#GCCAAT\t83\t20\t6181938\t60\t8S93M\t=\t6181919\t-112\tAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACCCCTCTACTGTTACCAAATCCACTCAGATTCCCAAATCTTCCACTT\t@@@DDFDFHHHHHJJJEHGGHIHHAEGHJJIJJFGGHGIDIGIJJ?BBGGGIIIJJIJGFHGIJEC(=3?C;?B9?@C>CECECAA(;;3>C#########\tXC:i:93\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:10G36C45\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:#########C>3;;(AACECEC>C@?9B?;C?3=(CEJIGHFGJIJJIIIGGGBB?JJIGIDIGHGGFJJIJJHGEAHHIHGGHEJJJHHHHHFDFDD@@@",
            "HWI-ST200R_251:6:2207:18561:163438#GCCAAT\t163\t20\t6181919\t60\t101M\t=\t6181938\t112\tCTGAGCACACCAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACACCTCTACTGTTACCAAATCCACTCAGATTCCCAA\t@CCFFDDFHHHHHIJJJIIJJJIJJIIJGJIIIJII?DGHIGHDGHIIIJIJIJIIDCHGIJIIGGHIFEHHHHFFFFFDC.6.66;@CCCDCCDC>CCCA\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:29G36C34\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:@CCFFDDFHHHHHIJJJIIJJJIJJIIJGJIIIJII?DGHIGHDGHIIIJIJIJIIDCHGIJIIGGHIFEHHHHFFFFFDC.6.66;@CCCDCCDC>CCCA",
        ]
        sams = map(SAMMapping, test_case_data)
        pair1 = sams[0:2]
        pair2 = sams[2:]
        self.link.process(pair1)

        self.assertEqual(2, len(self.ctx.emitted.keys()))
        key_list = list(sorted(self.ctx.emitted.keys()))
        self.assertEqual("0020:000006181919:F", key_list[0])
        self.assertEqual("0020:000006182030:R", key_list[1])

        self.link.process(pair2)

        self.assertEqual(2, len(self.ctx.emitted.keys()))
        for k, value_list in self.ctx.emitted.iteritems():
            self.assertEqual(2, len(
                value_list))  # each key should have two pairs at its position

        value_list = self.ctx.emitted["0020:000006181919:F"]
        for value in value_list:
            unserialized = proto.unserialize_pair(value)
            self.assertTrue(unserialized[0].pos < unserialized[1].pos)
        value_list = self.ctx.emitted["0020:000006182030:R"]
        for value in value_list:
            self.assertEqual(PAIR_STRING, value)
コード例 #2
0
	def test_rmdup_bug(self):
		test_case_data = [
"HWI-ST200R_251:5:1208:19924:124635#GCCAAT\t83\t20\t6181935\t60\t5S96M\t=\t6181919\t-112\tAAGTGGAAGATTTGGGAATCTGAGTGGATTTGGTAACAGTAGAGGGGTGGATCTGGCTTGGAAAACAATCGAGGTACCAATATAGGTGGTAGATGAATTTT\t?<?AADADBFBF<EHIGHGGGEAF3AF<CHGGDG9?GHFFACDHH)?@AHEHHIIIIE>A=A:?);B27@;@?>,;;C(5::>>>@5:()4>@@@######\tXC:i:96\tXT:A:U\tNM:i:1\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:1\tXO:i:0\tXG:i:0\tMD:Z:13G82\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:######@@@>4)(:5@>>>::5(C;;,>?@;@72B;)?:A=A>EIIIIHHEHA@?)HHDCAFFHG?9GDGGHC<FA3FAEGGGHGIHE<FBFBDADAA?<?",
"HWI-ST200R_251:5:1208:19924:124635#GCCAAT\t163\t20\t6181919\t60\t101M\t=\t6181935\t112\tCTGAGCACACCAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACACCTCTACTGTTACCAAATCCACTCAGATTCCCAA\t@@@FFFDDFHG??;EEH>HHGIGHEGCGEGGIGJG31?DDBBD>FGG@HG??DFBBADFAGII3@EH;;CEHECBB7?>CE.;...5>ACDDA:C:;>:>?\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:29G36C34\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:@@@FFFDDFHG??;EEH>HHGIGHEGCGEGGIGJG31?DDBBD>FGG@HG??DFBBADFAGII3@EH;;CEHECBB7?>CE.;...5>ACDDA:C:;>:>?",
"HWI-ST200R_251:6:2207:18561:163438#GCCAAT\t83\t20\t6181938\t60\t8S93M\t=\t6181919\t-112\tAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACCCCTCTACTGTTACCAAATCCACTCAGATTCCCAAATCTTCCACTT\t@@@DDFDFHHHHHJJJEHGGHIHHAEGHJJIJJFGGHGIDIGIJJ?BBGGGIIIJJIJGFHGIJEC(=3?C;?B9?@C>CECECAA(;;3>C#########\tXC:i:93\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:10G36C45\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:#########C>3;;(AACECEC>C@?9B?;C?3=(CEJIGHFGJIJJIIIGGGBB?JJIGIDIGHGGFJJIJJHGEAHHIHGGHEJJJHHHHHFDFDD@@@",
"HWI-ST200R_251:6:2207:18561:163438#GCCAAT\t163\t20\t6181919\t60\t101M\t=\t6181938\t112\tCTGAGCACACCAAAATTCATCTACCACCTATATTGGTACCTCGATTGTTTTCCAAGCCAGATCCACACCTCTACTGTTACCAAATCCACTCAGATTCCCAA\t@CCFFDDFHHHHHIJJJIIJJJIJJIIJGJIIIJII?DGHIGHDGHIIIJIJIJIIDCHGIJIIGGHIFEHHHHFFFFFDC.6.66;@CCCDCCDC>CCCA\tXT:A:U\tNM:i:2\tSM:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:2\tXO:i:0\tXG:i:0\tMD:Z:29G36C34\tRG:Z:raw_merged-1.2.3.4.5.6.7.8.bam\tOQ:Z:@CCFFDDFHHHHHIJJJIIJJJIJJIIJGJIIIJII?DGHIGHDGHIIIJIJIJIIDCHGIJIIGGHIFEHHHHFFFFFDC.6.66;@CCCDCCDC>CCCA",
		]
		sams = map(SAMMapping, test_case_data)
		pair1 = sams[0:2]
		pair2 = sams[2:]
		self.link.process(pair1)

		self.assertEqual(2, len(self.ctx.emitted.keys()))
		key_list = list(sorted(self.ctx.emitted.keys()))
		self.assertEqual("0020:000006181919:F", key_list[0])
		self.assertEqual("0020:000006182030:R", key_list[1])

		self.link.process(pair2)

		self.assertEqual(2, len(self.ctx.emitted.keys()))
		for k,value_list in self.ctx.emitted.iteritems():
			self.assertEqual(2, len(value_list)) # each key should have two pairs at its position

		value_list = self.ctx.emitted["0020:000006181919:F"]
		for value in value_list:
			unserialized = proto.unserialize_pair(value)
			self.assertTrue(unserialized[0].pos < unserialized[1].pos)
		value_list = self.ctx.emitted["0020:000006182030:R"]
		for value in value_list:
			self.assertEqual(PAIR_STRING, value)
コード例 #3
0
    def test_fw_rev_with_indels(self):
        """
        Here we have two duplicate pairs.  Read 1 in both pairs is positioned at the same location.
        For both pairs, read 2 is mapped on the reverse strand.  The second one is mapped with an
        insertion, so its 5' location is shifted by one.  Yet, the read end is in the same location.
        """
        test_case_data = [
            "HWI-ST200R_251:7:2207:3236:93050#CGATGT\t99\t7\t15609040\t60\t101M\t=\t15609197\t257\tCTAGCTTGTAACAATTGCTATAACTCCCCCACTTTGGATGGTAAATTTCTCCTCAGCTGTCATTGGCCCTCAAAGCCAAAATGACTCCAATTAGAATGTAT\tCCCFFFFFHHHHHJJJJJJJJJIIJJJJJJJIJJJJJJJJJFIJJJJJJJJJJJIJJIJGIGIJJJJJJJJJ>EHHEFFFFFEEDEDEDDCDDDACCA:CD",
            "HWI-ST200R_251:7:2207:3236:93050#CGATGT\t147\t7\t15609197\t60\t37M1I63M\t=\t15609040\t-257\tCTCCATTACAGCAGAGGAAAGAAACTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTAAAGAAACTGGGTTGAAGAAGTAGTTCATTGAATGGTTGTCTTAC\t################CAC@3DA:))&BDDDDDDB<&BDDDDDDDDHIJJIJJIJJJJJIIHHJJJJJJJJJJJJJJIJJJJJJJJJJHHHHHFFFFFBBC",
            "HWI-ST200R_251:1:1101:10006:13364#CGATGT\t99\t7\t15609040\t60\t101M\t=\t15609196\t257\tCTAGCTTGTAACAATTGCTATAACTCCCCCACTTTGGATGGTAAATTTCTCCTCAGCTGTCATTGGCCCTCAAAGCCAAAATGACTCCAATTAGAATGTAT\tCCCFFFFFHHHHHJJJJJJJJJJJGIJJJJIJJJJJJJJJIHIIJJJJJJJJJJJJIIJGGGIIIJDHIGIHFGFEHEF>??>@CDEECC@CCC(>;A:>5",
            "HWI-ST200R_251:1:1101:10006:13364#CGATGT\t147\t7\t15609196\t60\t101M\t=\t15609040\t-257\tTACCATTTAAAGCAGAGGAAAAAAACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAAAGAAACTGGGTTGAAGAAGTAGTTCATTGAATGGTTGTCTTAC\t############################BBDDDDB803DDDDDDDDHJJJIIJJJJJJJIHF?JJJJJJJJJIIJJJJJJJJJJJJJJHHHHHFFFFFB@C",
        ]

        sams = map(SAMMapping, test_case_data)
        pair1 = sams[0:2]
        pair2 = sams[2:]
        self.link.process(pair1)
        self.link.process(pair2)

        self.assertEqual(2, len(self.ctx.emitted.keys()))
        key_list = list(sorted(self.ctx.emitted.keys()))
        self.assertEqual("0007:000015609040:F", key_list[0])
        self.assertEqual("0007:000015609296:R", key_list[1])

        for k, value_list in self.ctx.emitted.iteritems():
            self.assertEqual(2, len(
                value_list))  # each key should have two pairs at its position

        value_list = self.ctx.emitted["0007:000015609040:F"]
        for value in value_list:
            unserialized = proto.unserialize_pair(value)
            self.assertTrue(unserialized[0].pos < unserialized[1].pos)
        value_list = self.ctx.emitted["0007:000015609296:R"]
        for value in value_list:
            self.assertEqual(PAIR_STRING, value)
コード例 #4
0
	def test_fw_rev_with_indels(self):
		"""
		Here we have two duplicate pairs.  Read 1 in both pairs is positioned at the same location.
		For both pairs, read 2 is mapped on the reverse strand.  The second one is mapped with an
		insertion, so its 5' location is shifted by one.  Yet, the read end is in the same location.
		"""
		test_case_data = [
"HWI-ST200R_251:7:2207:3236:93050#CGATGT\t99\t7\t15609040\t60\t101M\t=\t15609197\t257\tCTAGCTTGTAACAATTGCTATAACTCCCCCACTTTGGATGGTAAATTTCTCCTCAGCTGTCATTGGCCCTCAAAGCCAAAATGACTCCAATTAGAATGTAT\tCCCFFFFFHHHHHJJJJJJJJJIIJJJJJJJIJJJJJJJJJFIJJJJJJJJJJJIJJIJGIGIJJJJJJJJJ>EHHEFFFFFEEDEDEDDCDDDACCA:CD",
"HWI-ST200R_251:7:2207:3236:93050#CGATGT\t147\t7\t15609197\t60\t37M1I63M\t=\t15609040\t-257\tCTCCATTACAGCAGAGGAAAGAAACTTTTTTTTTTTCTTTTTTTTTTTTTTTTTTTAAAGAAACTGGGTTGAAGAAGTAGTTCATTGAATGGTTGTCTTAC\t################CAC@3DA:))&BDDDDDDB<&BDDDDDDDDHIJJIJJIJJJJJIIHHJJJJJJJJJJJJJJIJJJJJJJJJJHHHHHFFFFFBBC",
"HWI-ST200R_251:1:1101:10006:13364#CGATGT\t99\t7\t15609040\t60\t101M\t=\t15609196\t257\tCTAGCTTGTAACAATTGCTATAACTCCCCCACTTTGGATGGTAAATTTCTCCTCAGCTGTCATTGGCCCTCAAAGCCAAAATGACTCCAATTAGAATGTAT\tCCCFFFFFHHHHHJJJJJJJJJJJGIJJJJIJJJJJJJJJIHIIJJJJJJJJJJJJIIJGGGIIIJDHIGIHFGFEHEF>??>@CDEECC@CCC(>;A:>5",
"HWI-ST200R_251:1:1101:10006:13364#CGATGT\t147\t7\t15609196\t60\t101M\t=\t15609040\t-257\tTACCATTTAAAGCAGAGGAAAAAAACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTAAAGAAACTGGGTTGAAGAAGTAGTTCATTGAATGGTTGTCTTAC\t############################BBDDDDB803DDDDDDDDHJJJIIJJJJJJJIHF?JJJJJJJJJIIJJJJJJJJJJJJJJHHHHHFFFFFB@C",
		]

		sams = map(SAMMapping, test_case_data)
		pair1 = sams[0:2]
		pair2 = sams[2:]
		self.link.process(pair1)
		self.link.process(pair2)

		self.assertEqual(2, len(self.ctx.emitted.keys()))
		key_list = list(sorted(self.ctx.emitted.keys()))
		self.assertEqual("0007:000015609040:F", key_list[0])
		self.assertEqual("0007:000015609296:R", key_list[1])

		for k,value_list in self.ctx.emitted.iteritems():
			self.assertEqual(2, len(value_list)) # each key should have two pairs at its position

		value_list = self.ctx.emitted["0007:000015609040:F"]
		for value in value_list:
			unserialized = proto.unserialize_pair(value)
			self.assertTrue(unserialized[0].pos < unserialized[1].pos)
		value_list = self.ctx.emitted["0007:000015609296:R"]
		for value in value_list:
			self.assertEqual(PAIR_STRING, value)
コード例 #5
0
    def reduce(self, ctx):
        # create the "workspace"
        self.__pairs = []
        self.__unpaired = []

        # gather input
        key_values = ctx.getInputKey().split(':')

        if key_values[0] == seqal_app.UNMAPPED_STRING:
            # pair of unmapped sequences
            self.__process_unmapped_pairs(ctx)
        else:
            if len(key_values) != 3:
                raise RuntimeError(
                    "Unexpected key length %d.  Expected key format is ref_id:pos:orient"
                    % len(key))
            # convert key values and make it a tuple
            key = (int(key_values[0]), int(key_values[1]), key_values[2] == 'R'
                   )  # last value is True if reverse strand

            have_pairs = False  # keep track of whether we have at least one real pair.
            # load mappings
            while ctx.nextValue():
                value = ctx.getInputValue()
                if value == seqal_app.PAIR_STRING:
                    have_pairs = True
                else:
                    pair = protobuf_mapping.unserialize_pair(value)
                    if pair[0] is None or pair[0].is_unmapped():
                        # Sanity check. pair[0] should never be None or unmapped here.
                        raise ValueError(
                            "Error!  Got None or unmapped in first read for key %s.  pair: %s"
                            % (key, pair))

                    if pair[1] and pair[1].is_unmapped():
                        self.__output_sink.process((pair[1], None))
                        self.__unpaired.append((pair[0], None))
                    elif pair[1] is None:
                        self.__unpaired.append(pair)
                    else:
                        # Two mapped reads.
                        # pair[0] should never be unmapped.  That case should be handled by
                        # __process_unmapped_pairs.
                        self.__pairs.append(pair)
                        have_pairs = True

            self.__process_pairs()
            self.__process_fragments(have_pairs)

        # clean-up the workspace
        self.__pairs = None
        self.__unpaired = None
コード例 #6
0
	def test_emit_reverse_fragment1(self):
		# None in pair[0]. Fragment in pair[1].
		self.pair1 = test_utils.erase_read1(list(self.pair1))
		self.pair1[1].set_on_reverse(True)
		self.link.process(self.pair1)
		self.assertEqual(1, len(self.ctx.emitted.keys()))
		expected_key = test_utils.make_key(self.pair1[1])
		self.assertEqual(1, len(self.ctx.emitted[expected_key]))
		unserialized = proto.unserialize_pair(self.ctx.emitted[expected_key][0])
		self.assertTrue(unserialized[1] is None)
		self.assertEqual(self.pair1[1].tid, unserialized[0].tid)
		self.assertEqual(self.pair1[1].pos, unserialized[0].pos)
		self.assertTrue(unserialized[0].is_on_reverse())
コード例 #7
0
	def test_emit_forward_fragment2(self):
		# Fragment in pair[0].  None in pair[1]
		self.pair1 = test_utils.erase_read2(list(self.pair1))
		self.link.process(self.pair1)
		self.assertEqual(1, len(self.ctx.emitted.keys()))
		expected_key = test_utils.make_key(self.pair1[0])
		self.assertEqual(1, len(self.ctx.emitted[expected_key]))
		unserialized = proto.unserialize_pair(self.ctx.emitted[expected_key][0])
		self.assertTrue(unserialized[1] is None)
		self.assertEqual(self.pair1[0].tid, unserialized[0].tid)
		self.assertEqual(self.pair1[0].pos, unserialized[0].pos)
		self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES"))
		self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
コード例 #8
0
 def test_emit_reverse_fragment1(self):
     # None in pair[0]. Fragment in pair[1].
     self.pair1 = test_utils.erase_read1(list(self.pair1))
     self.pair1[1].set_on_reverse(True)
     self.link.process(self.pair1)
     self.assertEqual(1, len(self.ctx.emitted.keys()))
     expected_key = test_utils.make_key(self.pair1[1])
     self.assertEqual(1, len(self.ctx.emitted[expected_key]))
     unserialized = proto.unserialize_pair(
         self.ctx.emitted[expected_key][0])
     self.assertTrue(unserialized[1] is None)
     self.assertEqual(self.pair1[1].tid, unserialized[0].tid)
     self.assertEqual(self.pair1[1].pos, unserialized[0].pos)
     self.assertTrue(unserialized[0].is_on_reverse())
コード例 #9
0
 def test_emit_forward_fragment2(self):
     # Fragment in pair[0].  None in pair[1]
     self.pair1 = test_utils.erase_read2(list(self.pair1))
     self.link.process(self.pair1)
     self.assertEqual(1, len(self.ctx.emitted.keys()))
     expected_key = test_utils.make_key(self.pair1[0])
     self.assertEqual(1, len(self.ctx.emitted[expected_key]))
     unserialized = proto.unserialize_pair(
         self.ctx.emitted[expected_key][0])
     self.assertTrue(unserialized[1] is None)
     self.assertEqual(self.pair1[0].tid, unserialized[0].tid)
     self.assertEqual(self.pair1[0].pos, unserialized[0].pos)
     self.assertTrue(self.ctx.counters.has_key("Test:MAPPED COORDINATES"))
     self.assertEqual(1, self.ctx.counters["Test:MAPPED COORDINATES"])
コード例 #10
0
	def test_unmapped1(self):
		self.pair1[0].set_mapped(False)
		self.pair1[1].set_mate_mapped(False)
		self.link.process(self.pair1)

		self.assertEqual(1, len(self.ctx.emitted.keys()))
		self.assertTrue( test_utils.make_key(self.pair1[1]) in self.ctx.emitted.keys() )
		self.assertEqual(1, len(self.ctx.emitted.values()[0]))
		unserialized = proto.unserialize_pair(self.ctx.emitted.values()[0][0])
		self.assertFalse(unserialized[0] is None)
		self.assertFalse(unserialized[1] is None)
		self.assertEqual(self.pair1[1].tid, unserialized[0].tid)
		self.assertEqual(self.pair1[1].pos, unserialized[0].pos)
		self.assertEqual(1, self.ctx.counters["Test:UNMAPPED READS"])
コード例 #11
0
    def test_unmapped1(self):
        self.pair1[0].set_mapped(False)
        self.pair1[1].set_mate_mapped(False)
        self.link.process(self.pair1)

        self.assertEqual(1, len(self.ctx.emitted.keys()))
        self.assertTrue(
            test_utils.make_key(self.pair1[1]) in self.ctx.emitted.keys())
        self.assertEqual(1, len(self.ctx.emitted.values()[0]))
        unserialized = proto.unserialize_pair(self.ctx.emitted.values()[0][0])
        self.assertFalse(unserialized[0] is None)
        self.assertFalse(unserialized[1] is None)
        self.assertEqual(self.pair1[1].tid, unserialized[0].tid)
        self.assertEqual(self.pair1[1].pos, unserialized[0].pos)
        self.assertEqual(1, self.ctx.counters["Test:UNMAPPED READS"])
コード例 #12
0
ファイル: reducer.py プロジェクト: QwertyManiac/seal-cdh4
	def reduce(self, ctx):
		# create the "workspace"
		self.__pairs = []
		self.__unpaired = []

		# gather input
		key_values = ctx.getInputKey().split(':')

		if key_values[0] == seqal_app.UNMAPPED_STRING:
			# pair of unmapped sequences
			self.__process_unmapped_pairs(ctx)
		else:
			if len(key_values) != 3:
				raise RuntimeError("Unexpected key length %d.  Expected key format is ref_id:pos:orient" % len(key))
			# convert key values and make it a tuple
			key = (int(key_values[0]), int(key_values[1]), key_values[2] == 'R') # last value is True if reverse strand

			have_pairs = False # keep track of whether we have at least one real pair.
			# load mappings
			while ctx.nextValue():
				value = ctx.getInputValue()
				if value == seqal_app.PAIR_STRING:
					have_pairs = True
				else:
					pair = protobuf_mapping.unserialize_pair(value)
					if pair[0] is None or pair[0].is_unmapped():
						# Sanity check. pair[0] should never be None or unmapped here.
						raise ValueError("Error!  Got None or unmapped in first read for key %s.  pair: %s" % (key, pair))

					if pair[1] and pair[1].is_unmapped():
						self.__output_sink.process( (pair[1], None) )
						self.__unpaired.append( (pair[0], None) )
					elif pair[1] is None:
						self.__unpaired.append(pair)
					else:
						# Two mapped reads.
						# pair[0] should never be unmapped.  That case should be handled by
						# __process_unmapped_pairs.
						self.__pairs.append(pair)
						have_pairs = True

			self.__process_pairs()
			self.__process_fragments(have_pairs)

		# clean-up the workspace
		self.__pairs = None
		self.__unpaired = None
コード例 #13
0
	def test_emit_forward_pair(self):
		# We expect to get the pair emitted with the key generated from
		# read 1 (the left read).  On the other hand, we expect to get
		# PAIR_STRING with the key generated from read 2
		self.link.process(self.pair1)
		expected_keys = map(test_utils.make_key, self.pair1)
		for i in 0,1:
			self.assertTrue( self.ctx.emitted.has_key(expected_keys[i]) )
			self.assertEqual(1, len(self.ctx.emitted[expected_keys[i]]))

		unserialized = proto.unserialize_pair(self.ctx.emitted[expected_keys[0]][0])
		for j in 0,1:
			self.assertEqual(self.pair1[j].tid, unserialized[j].tid)
			self.assertEqual(self.pair1[j].pos, unserialized[j].pos)

		second_value = self.ctx.emitted[expected_keys[1]][0]
		self.assertEqual(PAIR_STRING, second_value)
コード例 #14
0
    def test_emit_forward_pair(self):
        # We expect to get the pair emitted with the key generated from
        # read 1 (the left read).  On the other hand, we expect to get
        # PAIR_STRING with the key generated from read 2
        self.link.process(self.pair1)
        expected_keys = map(test_utils.make_key, self.pair1)
        for i in 0, 1:
            self.assertTrue(self.ctx.emitted.has_key(expected_keys[i]))
            self.assertEqual(1, len(self.ctx.emitted[expected_keys[i]]))

        unserialized = proto.unserialize_pair(
            self.ctx.emitted[expected_keys[0]][0])
        for j in 0, 1:
            self.assertEqual(self.pair1[j].tid, unserialized[j].tid)
            self.assertEqual(self.pair1[j].pos, unserialized[j].pos)

        second_value = self.ctx.emitted[expected_keys[1]][0]
        self.assertEqual(PAIR_STRING, second_value)
コード例 #15
0
	def test_emit_backward_pair(self):
		# Similar to test_emit_forward_pair, but here we expect to have the reads
		# reordered.  So,
		#   key2 => reversed and serialized pair
		#   key1 => PAIR_STRING
		self.link.process(self.pair2)
		expected_keys = map(test_utils.make_key, self.pair2)
		for i in 0,1:
			self.assertTrue( self.ctx.emitted.has_key(expected_keys[i]) )
			self.assertEqual(1, len(self.ctx.emitted[expected_keys[i]]))

		unserialized = proto.unserialize_pair(self.ctx.emitted[expected_keys[1]][0])
		for j in 0,1:
			self.assertEqual(self.pair2[j].tid, unserialized[j^1].tid)
			self.assertEqual(self.pair2[j].pos, unserialized[j^1].pos)

		second_value = self.ctx.emitted[expected_keys[0]][0]
		self.assertEqual(PAIR_STRING, second_value)
コード例 #16
0
    def test_emit_backward_pair(self):
        # Similar to test_emit_forward_pair, but here we expect to have the reads
        # reordered.  So,
        #   key2 => reversed and serialized pair
        #   key1 => PAIR_STRING
        self.link.process(self.pair2)
        expected_keys = map(test_utils.make_key, self.pair2)
        for i in 0, 1:
            self.assertTrue(self.ctx.emitted.has_key(expected_keys[i]))
            self.assertEqual(1, len(self.ctx.emitted[expected_keys[i]]))

        unserialized = proto.unserialize_pair(
            self.ctx.emitted[expected_keys[1]][0])
        for j in 0, 1:
            self.assertEqual(self.pair2[j].tid, unserialized[j ^ 1].tid)
            self.assertEqual(self.pair2[j].pos, unserialized[j ^ 1].pos)

        second_value = self.ctx.emitted[expected_keys[0]][0]
        self.assertEqual(PAIR_STRING, second_value)
コード例 #17
0
ファイル: test_protobuf_mapping.py プロジェクト: okulev/seal
 def __pipe_pair_through(self, pair):
     message = io.serialize_pair(pair)
     return io.unserialize_pair(message)
コード例 #18
0
 def __process_unmapped_pairs(self, ctx):
     while ctx.nextValue():
         value = ctx.getInputValue()
         pair = protobuf_mapping.unserialize_pair(value)
         self.__output_sink.process(pair)
コード例 #19
0
 def __pipe_pair_through(self, pair):
     message = io.serialize_pair(pair)
     return io.unserialize_pair(message)
コード例 #20
0
ファイル: reducer.py プロジェクト: QwertyManiac/seal-cdh4
	def __process_unmapped_pairs(self, ctx):
		while ctx.nextValue():
			value = ctx.getInputValue()
			pair = protobuf_mapping.unserialize_pair(value)
			self.__output_sink.process(pair)