def test_cigars(self): left = MicroMock(query_name='A', cigarstring='1S2M4S', query_sequence='AAAAAA') right = MicroMock(query_name='A', cigarstring='16S32M64S', query_sequence='AAAAAA') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual(('1S2M4S', '16S32M64S'), paired_alignment.cigars()) self.assertEqual('1S2M4S~16S32M64S', paired_alignment.cigars('{left}~{right}'))
def test_end_median_odd(self): posAfam1 = MicroMock(align_pairs=[1, 1]) posAfam2 = MicroMock(align_pairs=[1, 1, 1]) posBfam1 = MicroMock(align_pairs=[1, 1, 1, 1, 1]) families = [posAfam1, posAfam2, posBfam1] stat_handler = _FamilySizeStatHandler(self.mock_logger) for family in families: stat_handler.handle(family) stat_handler.end() self.assertEqual(3, stat_handler.median)
def test_positions(self): left = MicroMock(query_name='A', reference_start=100, reference_end=150, query_sequence='AAAAAA') right = MicroMock(query_name='A', reference_start=200, reference_end=250, query_sequence='AAAAAA') paired_alignment = samtools.PairedAlignment(left, right, tag_length=1) self.assertEqual((101,251), paired_alignment.positions()) self.assertEqual('101~251', paired_alignment.positions('{left}~{right}'))
def test_write_passThroughToBaseWriter(self): base_writer = MockAlignWriter() writer = samtools.LoggingWriter(base_writer, self.mock_logger) fam1 = MicroMock(umi_sequence = 1, filter_value=None) al1A = MicroMock(filter_value=None) al1B = MicroMock(filter_value = 'foo') family_aligns = [(fam1, al1A), (fam1, al1B)] for family, align in family_aligns: writer.write(family, None, align) self.assertEqual([(fam1, al1A), (fam1, al1B)], base_writer._write_calls)
def test_end_quantiles_even(self): posAfam1 = MicroMock(align_pairs=[1] * 2) posAfam2 = MicroMock(align_pairs=[1] * 3) posBfam1 = MicroMock(align_pairs=[1] * 9) posBfam2 = MicroMock(align_pairs=[1] * 12) families = [posAfam1, posAfam2, posBfam1, posBfam2] stat_handler = _FamilySizeStatHandler(self.mock_logger) for family in families: stat_handler.handle(family) stat_handler.end() self.assertEqual(2.75, stat_handler.quartile_1) self.assertEqual(9.75, stat_handler.quartile_3)
def test_end_mean(self): posAfam1 = MicroMock(align_pairs=[1] * 1) posAfam2 = MicroMock(align_pairs=[1] * 2) posBfam1 = MicroMock(align_pairs=[1] * 4) posBfam2 = MicroMock(align_pairs=[1] * 8) posBfam3 = MicroMock(align_pairs=[1] * 16) families = [posAfam1, posAfam2, posBfam1, posBfam2, posBfam3] stat_handler = _FamilySizeStatHandler(self.mock_logger) for family in families: stat_handler.handle(family) stat_handler.end() self.assertEqual(6.2, stat_handler.mean)
def test_build_bam_tags_x0_filter(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X0') self.assertEqual('X0', tag._tag_name) self.assertEqual('Z', tag._tag_type) self.assertRegexpMatches(tag._description, 'filter') self.assertEqual(None, tag._get_value(None, None, None)) family = MicroMock(filter_value=None) connor_align = MicroMock(filter_value=None) self.assertEqual(None, tag._get_value(family, None, connor_align)) family = MicroMock(filter_value='foo') connor_align = MicroMock(filter_value='bar') self.assertEqual('foo', tag._get_value(family, None, None)) self.assertEqual('bar', tag._get_value(None, None, connor_align)) self.assertEqual('foo;bar', tag._get_value(family, None, connor_align))
def test_write_UnplacedAlignWritesFamilyNone(self): base_writer = MockAlignWriter() writer = samtools.LoggingWriter(base_writer, self.mock_logger) fam1 = samtools.LoggingWriter.UNPLACED_FAMILY al1A = MicroMock(filter_value = 'foo') writer.write(fam1, None, al1A) self.assertEqual([(None, al1A)], base_writer._write_calls)
def test_build_bam_tags_x6_consensus_template(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X6') self.assertEqual('i', tag._tag_type) self.assertRegexpMatches(tag._description, 'template for the consensus alignment') align = mock_align() family = MicroMock(is_consensus_template=lambda x: 1) tag.set_tag(family, None, align) self.assertEqual([('X6', 1)], align.get_tags()) align = mock_align() family = MicroMock(is_consensus_template=lambda x: None) tag.set_tag(family, None, align) self.assertEqual([], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_build_bam_tags_x5_family_size(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X5') self.assertEqual('i', tag._tag_type) self.assertRegexpMatches(tag._description, 'family size') family = MicroMock(included_pair_count=42) align = mock_align() tag.set_tag(family, None, align) self.assertEqual([('X5', 42)], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_build_bam_tags_x3_unique_identifier(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X3') self.assertEqual('i', tag._tag_type) self.assertRegexpMatches(tag._description, 'unique identifier') family = MicroMock(umi_sequence=42) align = mock_align() tag.set_tag(family, None, align) self.assertEqual([('X3', 42)], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_close_whenAllPlaced(self): base_writer = MockAlignWriter() writer = samtools.LoggingWriter(base_writer, self.mock_logger) fam1 = MicroMock(umi_sequence=4, filter_value=None) alignA = MicroMock(filter_value=None) family_aligns = [(fam1, alignA), (fam1, alignA)] for family, align in family_aligns: writer.write(family, None, align) writer.close() log_lines = self.mock_logger._log_calls['INFO'] self.assertEqual('0.00% (0/2) alignments unplaced or discarded', log_lines[0]) self.assertEqual('100.00% (2/2) alignments included in 1 families', log_lines[1]) self.assertEqual('50.00% deduplication rate (1 - 1 families/2 included alignments)', log_lines[2]) self.assertEqual(3, len(log_lines)) log_lines = self.mock_logger._log_calls['DEBUG'] self.assertEqual(0, len(log_lines))
def test_build_bam_tags_x4_umt_barcodes(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X4') self.assertEqual('Z', tag._tag_type) self.assertRegexpMatches(tag._description, 'UMT barcodes') family = MicroMock(umt=lambda *args:'AAA~CCC') align = mock_align() tag.set_tag(family, None, align) self.assertEqual([('X4', 'AAA~CCC')], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_build_bam_tags_x1_positions(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X1') self.assertEqual('Z', tag._tag_type) self.assertRegexpMatches(tag._description, 'leftmost~rightmost matched pair positions') align = mock_align() pair = MicroMock(positions=lambda x:'100~150') tag.set_tag(None, pair, align) self.assertEqual([('X1', '100~150')], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_build_bam_tags_x2_cigars(self): tag = SamtoolsTest.get_tag(samtools._build_bam_tags(), 'X2') self.assertEqual('Z', tag._tag_type) self.assertRegexpMatches(tag._description, 'L~R CIGARs') align = mock_align() pair = MicroMock(cigars=lambda x:'1S2M4S~8S16M32S') tag.set_tag(None, pair, align) self.assertEqual([('X2', '1S2M4S~8S16M32S')], align.get_tags()) align = mock_align() tag.set_tag(None, None, align) self.assertEqual([], align.get_tags())
def test_close_logsFilterStats(self): base_writer = MockAlignWriter() writer = samtools.LoggingWriter(base_writer, self.mock_logger) fam1 = None al1A = MicroMock(filter_value='low mapping qual') al1B = MicroMock(filter_value='low mapping qual') fam2 = None al2A = MicroMock(filter_value='unpaired read') al2B = MicroMock(filter_value='unpaired read') fam3 = MicroMock(umi_sequence=3, filter_value=None) al3A = MicroMock(filter_value='minority CIGAR') al3B = MicroMock(filter_value=None) fam4 = MicroMock(umi_sequence=4, filter_value=None) al4A = MicroMock(filter_value=None) al4B = MicroMock(filter_value=None) fam5 = MicroMock(umi_sequence=5, filter_value='small family') al5A = MicroMock(filter_value=None) al5B = MicroMock(filter_value=None) family_aligns = [(fam1, al1A), (fam1, al1B), (fam2, al2A), (fam2, al2B), (fam3, al3A), (fam3, al3B), (fam4, al4A), (fam4, al4B), (fam5, al5A), (fam5, al5B)] for family, align in family_aligns: writer.write(family, None, align) writer.close() log_lines = self.mock_logger._log_calls['INFO'] self.assertEqual('70.00% (7/10) alignments unplaced or discarded', log_lines[0]) self.assertEqual('families discarded: 33.33% (1/3) small family', log_lines[1]) self.assertEqual('30.00% (3/10) alignments included in 2 families', log_lines[2]) self.assertEqual('33.33% deduplication rate (1 - 2 families/3 included alignments)', log_lines[3]) self.assertEqual(4, len(log_lines)) log_lines = self.mock_logger._log_calls['DEBUG'] self.assertEqual('alignments unplaced: 20.00% (2/10) low mapping qual', log_lines[0]) self.assertEqual('alignments unplaced: 20.00% (2/10) unpaired read', log_lines[1]) self.assertEqual('alignments discarded: 20.00% (2/10) small family', log_lines[2]) self.assertEqual('alignments discarded: 10.00% (1/10) minority CIGAR', log_lines[3]) self.assertEqual(4, len(log_lines))
def _mock_align_pair(query_name, filter_value=None): left = ConnorAlign(mock_align(query_name=query_name), filter_value) right = ConnorAlign(mock_align(query_name=query_name), filter_value) return MicroMock(query_name=query_name, left=left, right=right)