def test_close_sortsAndIndexes(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1', reference_start=100)) align2 = ConnorAlign(mock_align(query_name='align2', reference_start=200)) align3 = ConnorAlign(mock_align(query_name='align3', reference_start=300)) tag1 = BamTag('X1','Z', 'desc', get_value=lambda family, pair, align: family) tag2 = BamTag('X2','Z', 'desc', get_value=lambda family, pair, align: align.query_name) writer = samtools.AlignWriter(header, bam_path, [tag1, tag2]) writer.write('familyC', None, align3) writer.write('familyA', None, align1) writer.write('familyB', None, align2) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() self.assertEqual(3, len(actual_aligns)) self.assertEqual('align1', actual_aligns[0].query_name) self.assertEqual('align2', actual_aligns[1].query_name) self.assertEqual('align3', actual_aligns[2].query_name)
def test_set_tag(self): def get_value (family, pair, align): return family + ':' + pair + ':' + align.query_name tag = BamTag('X9', 'Z', 'foo description', get_value) connor_align = ConnorAlign(mock_align()) tag.set_tag('family1', 'pair1', connor_align) self.assertEqual([('X9', 'family1:pair1:align1')], connor_align.get_tags())
def test_set_tag_NoneReplacedWIthNullObject(self): def get_value(family, pair, align): return ':'.join([type(family).__name__, type(pair).__name__, type(align).__name__]) tag = BamTag('X9', 'Z', 'foo description', get_value) connor_align = ConnorAlign(mock_align(query_name='baz')) tag.set_tag(None, None, connor_align) self.assertEqual([('X9', '_NullObject:_NullObject:ConnorAlign')], connor_align.get_tags())
def test_write_removesTagsWhenValueIsNone(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1')) align1.set_tag('X1', 'No', 'Z') tag1 = BamTag('X1','Z', 'desc', get_value = lambda family, pair, align: None) writer = samtools.AlignWriter(header, bam_path, [tag1]) writer.write('familyA', None, align1) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() align_tags = {} for actual_align in actual_aligns: for t_name, t_val, t_type in actual_align.get_tags(with_value_type=True): key = (actual_align.query_name, t_name) t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type) align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val) self.assertEqual(1, len(actual_aligns)) self.assertEqual(0, len(align_tags))
def test_write_addsAlignTags(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}] } align1 = ConnorAlign(mock_align(query_name='align1')) align2 = ConnorAlign(mock_align(query_name='align2')) align3 = ConnorAlign(mock_align(query_name='align3')) tag1 = BamTag('X1','Z', 'desc', get_value=lambda family,pair,align: family) tag2 = BamTag('X2','Z', 'desc', get_value=lambda family,pair,align: pair) tag3 = BamTag('X3','Z', 'desc', get_value=lambda family,pair,align: align.query_name) writer = samtools.AlignWriter(header, bam_path, [tag1, tag2, tag3]) writer.write('familyA', 'pair1', align1) writer.write('familyB', 'pair2', align2) writer.write('familyC', 'pair3', align3) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_aligns = [a for a in bamfile.fetch()] bamfile.close() align_tags = {} for actual_align in actual_aligns: for t_name, t_val, t_type in actual_align.get_tags(with_value_type=True): key = (actual_align.query_name, t_name) t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type) align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val) self.assertEqual(3, len(actual_aligns)) self.assertEqual("X1:Z:familyA", align_tags[('align1', 'X1')]) self.assertEqual("X1:Z:familyB", align_tags[('align2', 'X1')]) self.assertEqual("X1:Z:familyC", align_tags[('align3', 'X1')]) self.assertEqual("X2:Z:pair1", align_tags[('align1', 'X2')]) self.assertEqual("X2:Z:pair2", align_tags[('align2', 'X2')]) self.assertEqual("X2:Z:pair3", align_tags[('align3', 'X2')]) self.assertEqual("X3:Z:align1", align_tags[('align1', 'X3')]) self.assertEqual("X3:Z:align2", align_tags[('align2', 'X3')]) self.assertEqual("X3:Z:align3", align_tags[('align3', 'X3')])
def test_write_addsHeaderTags(self): with TempDirectory() as tmp_dir: bam_path = os.path.join(tmp_dir.path, 'destination.bam') header = { 'HD': {'VN': '1.0'}, 'SQ': [{'LN': 1575, 'SN': 'chr1'}, {'LN': 1584, 'SN': 'chr2'}], 'CO': ['comment1', 'comment2']} tag1 = BamTag('X1','Z', 'annotates family', get_value=None) tag2 = BamTag('X2','Z', 'annotates alignment', get_value=None) writer = samtools.AlignWriter(header, bam_path, [tag2, tag1]) writer.close() bamfile = samtools.alignment_file(bam_path, 'rb') actual_header = dict(bamfile.header) bamfile.close() expected_header = deepcopy(header) expected_header.pop('CO') actual_comments = actual_header.pop('CO') expected_comments = ['comment1', 'comment2', 'connor\tBAM tag\tX1: annotates family', 'connor\tBAM tag\tX2: annotates alignment'] self.assertEqual(expected_comments, actual_comments)
def test_init_setsHeaderComment(self): tag = BamTag('foo', 'Z', 'foo description', lambda fam, align: None) self.assertEqual('connor\tBAM tag\tfoo: foo description', tag.header_comment)
def test_lt_sortsByNameThenDescription(self): base = BamTag('X2', 'i', 'Desc B', None) self.assertEqual(False, base.__lt__(base)) self.assertEqual(False, base.__lt__(BamTag('X2','i', 'Desc B', None))) self.assertEqual(True, base.__lt__(BamTag('X2','i', 'Desc C', None))) self.assertEqual(True, base.__lt__(BamTag('X3','i', 'Desc B', None))) self.assertEqual(False, base.__lt__(BamTag('X1','i', 'Desc B', None))) self.assertEqual(False, base.__lt__(BamTag('X2','i', 'Desc A', None)))