def _build_coordinate_pairs(connor_alignments, excluded_writer): MISSING_MATE_FILTER = 'read mate was missing or excluded' coords = defaultdict(dict) for alignment in connor_alignments: if alignment.orientation == 'left': key = (alignment.reference_id, alignment.next_reference_start) coords[key][alignment.query_name] = alignment elif alignment.orientation == 'neither': key = (alignment.reference_id, alignment.next_reference_start) if key in coords and alignment.query_name in coords[key]: align1 = coords[key].pop(alignment.query_name) yield samtools.PairedAlignment(align1, alignment) else: coords[key][alignment.query_name] = alignment else: key = (alignment.reference_id, alignment.reference_start) coord = coords[key] l_align = coord.pop(alignment.query_name, None) # Clear empty coordinate dict if not len(coord): del coords[key] if l_align: yield samtools.PairedAlignment(l_align, alignment) else: alignment.filter_value = MISSING_MATE_FILTER excluded_writer.write(None, None, alignment) for aligns in list(coords.values()): for align in list(aligns.values()): align.filter_value = MISSING_MATE_FILTER excluded_writer.write(None, None, align)
def test_eq(self): left = mock_align(reference_start=100, next_reference_start=200) right = mock_align(reference_start=200, next_reference_start=100) other = mock_align(reference_start=0, next_reference_start=500) base = samtools.PairedAlignment(left, right) self.assertEquals(base, samtools.PairedAlignment(left, right)) self.assertNotEquals(base, samtools.PairedAlignment(other, right)) self.assertNotEquals(base, samtools.PairedAlignment(left, other))
def test_hash(self): left_A = mock_align(query_name="alignA", reference_start=100) right_A = mock_align(query_name="alignA", reference_start=200) left_B = mock_align(query_name="alignA", reference_start=100) right_B = mock_align(query_name="alignA", reference_start=200) actual_set = set() base = samtools.PairedAlignment(left_A, right_A) actual_set.add(base) self.assertEquals(1, len(actual_set)) actual_set.add(base) self.assertEquals(1, len(actual_set)) actual_set.add(samtools.PairedAlignment(left_A, right_A)) self.assertEquals(1, len(actual_set)) equivalent_pair = samtools.PairedAlignment(left_B, right_B) actual_set.add(equivalent_pair) self.assertEquals(1, len(actual_set))
def test_filter_value(self): left = ConnorAlign(mock_align(), filter_value=None) right = ConnorAlign(mock_align(), filter_value=None) paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual(None, paired_alignment.filter_value) left = ConnorAlign(mock_align(), filter_value='') right = ConnorAlign(mock_align(), filter_value='') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual(None, paired_alignment.filter_value) left = ConnorAlign(mock_align(), filter_value='foo') right = ConnorAlign(mock_align(), filter_value=None) paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual(('foo', None), paired_alignment.filter_value) left = ConnorAlign(mock_align(), filter_value=None) right = ConnorAlign(mock_align(), filter_value='bar') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual((None, 'bar'), paired_alignment.filter_value)
def test_cigars(self): left = MicroMock(query_name='A', cigarstring='1S2M4S', query_sequence='AAAAAA') right = MicroMock(query_name='A', cigarstring='16S32M64S', query_sequence='AAAAAA') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual(('1S2M4S', '16S32M64S'), paired_alignment.cigars()) self.assertEqual('1S2M4S~16S32M64S', paired_alignment.cigars('{left}~{right}'))
def test_positions(self): left = MicroMock(query_name='A', reference_start=100, reference_end=150, query_sequence='AAAAAA') right = MicroMock(query_name='A', reference_start=200, reference_end=250, query_sequence='AAAAAA') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual((101,251), paired_alignment.positions()) self.assertEqual('101~251', paired_alignment.positions('{left}~{right}'))
def test_init(self): left_align = mock_align(query_name="alignA", query_sequence="AAATTT" "GGGG") right_align = mock_align(query_name="alignA", query_sequence="TTTT" "CCCGGG") tag_length = 6 actual_paired_alignment = samtools.PairedAlignment(left_align, right_align, tag_length) self.assertIs(left_align, actual_paired_alignment.left) self.assertIs(right_align, actual_paired_alignment.right) left_umt = self.byte_array_to_string(actual_paired_alignment.umt[0]) right_umt = self.byte_array_to_string(actual_paired_alignment.umt[1]) self.assertEquals(("AAATTT", "CCCGGG"), (left_umt, right_umt))
def test_replace_umt(self): left_A = mock_align(query_sequence='AANN', query_qualities=[1,2,3,4]) right_A = mock_align(query_sequence='NNCC', query_qualities=[5,6,7,8]) paired_align = samtools.PairedAlignment(left_A, right_A, tag_length=2) paired_align.replace_umt(('GG','TT')) left = paired_align.left right = paired_align.right self.assertEquals('GGNN', self.byte_array_to_string(left.query_sequence)) self.assertEquals('NNTT', self.byte_array_to_string(right.query_sequence)) self.assertEquals([1,2,3,4], left.query_qualities) self.assertEquals([5,6,7,8], right.query_qualities)
def _build_consensus(self, umt, align_pairs): included_pairs = [p for p in align_pairs if not p.filter_value] template_pair = _TagFamily._select_template_alignment_pair(included_pairs) left_align = deepcopy(template_pair.left, {}) right_align = deepcopy(template_pair.right, {}) (left_sequence, right_sequence) = self._generate_consensus_sequence(included_pairs) left_align.query_sequence = left_sequence right_align.query_sequence = right_sequence left_align.query_qualities = \ template_pair.left.query_qualities right_align.query_qualities = \ template_pair.right.query_qualities consensus_pair = samtools.PairedAlignment(left_align, right_align, tag_length=len(umt[0])) consensus_pair.replace_umt(umt) return consensus_pair
def test_replace_umt_errorIfInconsistentUmtLength(self): left_A = mock_align(query_sequence='AANN', query_qualities=[1,2,3,4]) right_A = mock_align(query_sequence='NNCC', query_qualities=[5,6,7,8]) paired_align = samtools.PairedAlignment(left_A, right_A, tag_length=2) self.assertRaisesRegexp(ValueError, r'Each UMT must match tag_length \(2\)', paired_align.replace_umt, ('G','TT')) self.assertRaisesRegexp(ValueError, r'Each UMT must match tag_length \(2\)', paired_align.replace_umt, ('GG','T')) self.assertRaisesRegexp(ValueError, r'Each UMT must match tag_length \(2\)', paired_align.replace_umt, (None, None)) self.assertRaisesRegexp(ValueError, r'Each UMT must match tag_length \(2\)', paired_align.replace_umt, ('G',))
def test_query_name(self): left = mock_align(query_name="alignA", reference_start=100) right = mock_align(query_name="alignA", reference_start=200) paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual("alignA", paired_alignment.query_name)