Beispiel #1
0
    def test_close_sortsAndIndexes(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))
            align2 = ConnorAlign(mock_align(query_name='align2',
                                            reference_start=200))
            align3 = ConnorAlign(mock_align(query_name='align3',
                                            reference_start=300))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family, pair, align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family, pair, align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2])

            writer.write('familyC', None, align3)
            writer.write('familyA', None, align1)
            writer.write('familyB', None, align2)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

            self.assertEqual(3, len(actual_aligns))
            self.assertEqual('align1', actual_aligns[0].query_name)
            self.assertEqual('align2', actual_aligns[1].query_name)
            self.assertEqual('align3', actual_aligns[2].query_name)
Beispiel #2
0
    def test_write_removesTagsWhenValueIsNone(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align1.set_tag('X1', 'No', 'Z')

            tag1 = BamTag('X1','Z', 'desc',
                          get_value = lambda family, pair, align: None)

            writer = samtools.AlignWriter(header, bam_path, [tag1])

            writer.write('familyA', None, align1)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(1, len(actual_aligns))
        self.assertEqual(0, len(align_tags))
Beispiel #3
0
 def test_init_defaultToNoTags(self):
     with TempDirectory() as tmp_dir:
         bam_path = os.path.join(tmp_dir.path, "destination.bam")
         header = { 'HD': {'VN': '1.0'},
                   'SQ': [{'LN': 1575, 'SN': 'chr1'},
                          {'LN': 1584, 'SN': 'chr2'}] }
         writer = samtools.AlignWriter(header, bam_path)
         writer.close()
     self.assertEqual([], writer._tags)
Beispiel #4
0
    def test_close_logs(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))

            writer = samtools.AlignWriter(header, bam_path, [])

            writer.write('familyA', None, align1)
            writer.close(log=self.mock_logger)
        info_log_lines = self.mock_logger._log_calls['INFO']
        self.assertEqual(1, len(info_log_lines))
        self.assertRegexpMatches(info_log_lines[0], 'destination.bam')
Beispiel #5
0
    def test_write_addsAlignTags(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align2 = ConnorAlign(mock_align(query_name='align2'))
            align3 = ConnorAlign(mock_align(query_name='align3'))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family,pair,align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family,pair,align: pair)
            tag3 = BamTag('X3','Z', 'desc',
                          get_value=lambda family,pair,align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2, tag3])

            writer.write('familyA', 'pair1', align1)
            writer.write('familyB', 'pair2', align2)
            writer.write('familyC', 'pair3', align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(3, len(actual_aligns))
        self.assertEqual("X1:Z:familyA", align_tags[('align1', 'X1')])
        self.assertEqual("X1:Z:familyB", align_tags[('align2', 'X1')])
        self.assertEqual("X1:Z:familyC", align_tags[('align3', 'X1')])
        self.assertEqual("X2:Z:pair1", align_tags[('align1', 'X2')])
        self.assertEqual("X2:Z:pair2", align_tags[('align2', 'X2')])
        self.assertEqual("X2:Z:pair3", align_tags[('align3', 'X2')])
        self.assertEqual("X3:Z:align1", align_tags[('align1', 'X3')])
        self.assertEqual("X3:Z:align2", align_tags[('align2', 'X3')])
        self.assertEqual("X3:Z:align3", align_tags[('align3', 'X3')])
Beispiel #6
0
    def test_write(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, "destination.bam")
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name="align1"))
            align2 = ConnorAlign(mock_align(query_name="align2"))
            align3 = ConnorAlign(mock_align(query_name="align3"))
            family = None
            writer = samtools.AlignWriter(header, bam_path)

            writer.write(family, None, align1)
            writer.write(family, None, align2)
            writer.write(family, None, align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_query_names = [align.query_name for align in bamfile.fetch()]
            bamfile.close()

        self.assertEqual(['align1', 'align2', 'align3'], actual_query_names)
Beispiel #7
0
    def test_write_addsHeaderTags(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}],
                      'CO': ['comment1', 'comment2']}
            tag1 = BamTag('X1','Z', 'annotates family', get_value=None)
            tag2 = BamTag('X2','Z', 'annotates alignment', get_value=None)
            writer = samtools.AlignWriter(header, bam_path, [tag2, tag1])
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_header = dict(bamfile.header)
            bamfile.close()

        expected_header = deepcopy(header)
        expected_header.pop('CO')
        actual_comments = actual_header.pop('CO')
        expected_comments = ['comment1',
                             'comment2',
                             'connor\tBAM tag\tX1: annotates family',
                             'connor\tBAM tag\tX2: annotates alignment']
        self.assertEqual(expected_comments, actual_comments)