Example #1
0
    def test_close_sortsAndIndexes(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))
            align2 = ConnorAlign(mock_align(query_name='align2',
                                            reference_start=200))
            align3 = ConnorAlign(mock_align(query_name='align3',
                                            reference_start=300))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family, pair, align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family, pair, align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2])

            writer.write('familyC', None, align3)
            writer.write('familyA', None, align1)
            writer.write('familyB', None, align2)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

            self.assertEqual(3, len(actual_aligns))
            self.assertEqual('align1', actual_aligns[0].query_name)
            self.assertEqual('align2', actual_aligns[1].query_name)
            self.assertEqual('align3', actual_aligns[2].query_name)
Example #2
0
    def test_gettersPassthroughToPysamAlignSegment(self):
        pysam_align = mock_align(query_name="queryname_1",
                            flag=99,
                            reference_id=3,
                            reference_start=142,
                            mapping_quality=20,
                            cigarstring="8M",
                            next_reference_id=4,
                            next_reference_start=242,
                            template_length=100,
                            query_sequence="ACGTACGT",
                            query_qualities=[20]*8,
                            )
        pysam_align.set_tag('X1', 'foo')
        connor_align = ConnorAlign(pysam_align)

        self.assertEqual('queryname_1', connor_align.query_name)
        self.assertEqual(99, connor_align.flag)
        self.assertEqual(3, connor_align.reference_id)
        self.assertEqual(142, connor_align.reference_start)
        self.assertEqual(20, connor_align.mapping_quality)
        self.assertEqual('8M', connor_align.cigarstring)
        self.assertEqual(242, connor_align.next_reference_start)
        self.assertEqual(100, connor_align.template_length)
        self.assertEqual('ACGTACGT',
                         ConnorAlignTest.byte_array_to_string(connor_align.query_sequence))
        self.assertEqual([20] * 8, connor_align.query_qualities)
        self.assertEqual(150, connor_align.reference_end)
        self.assertEqual('foo', connor_align.get_tag('X1'))
        self.assertEqual([('X1', 'foo')], connor_align.get_tags())
Example #3
0
    def test_write_removesTagsWhenValueIsNone(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align1.set_tag('X1', 'No', 'Z')

            tag1 = BamTag('X1','Z', 'desc',
                          get_value = lambda family, pair, align: None)

            writer = samtools.AlignWriter(header, bam_path, [tag1])

            writer.write('familyA', None, align1)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(1, len(actual_aligns))
        self.assertEqual(0, len(align_tags))
Example #4
0
    def test_set_tag(self):
        def get_value (family, pair, align):
            return family + ':' + pair + ':' + align.query_name
        tag = BamTag('X9', 'Z', 'foo description', get_value)
        connor_align = ConnorAlign(mock_align())

        tag.set_tag('family1', 'pair1', connor_align)

        self.assertEqual([('X9', 'family1:pair1:align1')],
                         connor_align.get_tags())
Example #5
0
    def test_set_tag_NoneReplacedWIthNullObject(self):
        def get_value(family, pair, align):
            return ':'.join([type(family).__name__,
                             type(pair).__name__,
                             type(align).__name__])
        tag = BamTag('X9', 'Z', 'foo description', get_value)
        connor_align = ConnorAlign(mock_align(query_name='baz'))

        tag.set_tag(None, None, connor_align)

        self.assertEqual([('X9', '_NullObject:_NullObject:ConnorAlign')], connor_align.get_tags())
Example #6
0
    def test_write_addsAlignTags(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align2 = ConnorAlign(mock_align(query_name='align2'))
            align3 = ConnorAlign(mock_align(query_name='align3'))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family,pair,align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family,pair,align: pair)
            tag3 = BamTag('X3','Z', 'desc',
                          get_value=lambda family,pair,align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2, tag3])

            writer.write('familyA', 'pair1', align1)
            writer.write('familyB', 'pair2', align2)
            writer.write('familyC', 'pair3', align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(3, len(actual_aligns))
        self.assertEqual("X1:Z:familyA", align_tags[('align1', 'X1')])
        self.assertEqual("X1:Z:familyB", align_tags[('align2', 'X1')])
        self.assertEqual("X1:Z:familyC", align_tags[('align3', 'X1')])
        self.assertEqual("X2:Z:pair1", align_tags[('align1', 'X2')])
        self.assertEqual("X2:Z:pair2", align_tags[('align2', 'X2')])
        self.assertEqual("X2:Z:pair3", align_tags[('align3', 'X2')])
        self.assertEqual("X3:Z:align1", align_tags[('align1', 'X3')])
        self.assertEqual("X3:Z:align2", align_tags[('align2', 'X3')])
        self.assertEqual("X3:Z:align3", align_tags[('align3', 'X3')])
Example #7
0
    def test_filter(self):
        pysam_align = mock_align(query_name="queryname_1",
                            flag=99,
                            reference_id=3,
                            reference_start=142,
                            mapping_quality=20,
                            cigarstring="8M",
                            next_reference_id=4,
                            next_reference_start=242,
                            template_length=100,
                            query_sequence="ACGTACGT",
                            query_qualities=[20]*8,
                            )
        connor_align = ConnorAlign(pysam_align)

        self.assertEqual(None, connor_align.filter_value)
        connor_align.filter_value = 'foo'
        self.assertEqual('foo', connor_align.filter_value)
Example #8
0
    def test_filter_alignments_passthorughIncludedAligns(self):
        align1 = mock_align(query_name="align1")
        base = [align1]
        excluded_writer = MockAlignWriter()

        aligns = [align for align in filter_alignments(base,
                                                       excluded_writer)]

        self.assertEqual([ConnorAlign(align1)],aligns)
        self.assertEqual(0, len(excluded_writer._write_calls))
Example #9
0
 def test_eq(self):
     pysam_align = mock_align(query_name="align1")
     base =  ConnorAlign(pysam_align)
     self.assertEqual(base, base)
     self.assertEqual(base, ConnorAlign(pysam_align))
     self.assertEqual(base, ConnorAlign(mock_align(query_name = "align1")))
     different_pysam_align = ConnorAlign(mock_align(query_name = "align2"))
     self.assertNotEqual(base, different_pysam_align)
     different_filter = ConnorAlign(pysam_align)
     different_filter.filter_value = "foo; bar"
     self.assertNotEqual(base, different_filter)
Example #10
0
    def test_write(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, "destination.bam")
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name="align1"))
            align2 = ConnorAlign(mock_align(query_name="align2"))
            align3 = ConnorAlign(mock_align(query_name="align3"))
            family = None
            writer = samtools.AlignWriter(header, bam_path)

            writer.write(family, None, align1)
            writer.write(family, None, align2)
            writer.write(family, None, align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_query_names = [align.query_name for align in bamfile.fetch()]
            bamfile.close()

        self.assertEqual(['align1', 'align2', 'align3'], actual_query_names)
Example #11
0
    def test_close_logs(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))

            writer = samtools.AlignWriter(header, bam_path, [])

            writer.write('familyA', None, align1)
            writer.close(log=self.mock_logger)
        info_log_lines = self.mock_logger._log_calls['INFO']
        self.assertEqual(1, len(info_log_lines))
        self.assertRegexpMatches(info_log_lines[0], 'destination.bam')
Example #12
0
    def test_filter_value(self):
        left = ConnorAlign(mock_align(), filter_value=None)
        right = ConnorAlign(mock_align(), filter_value=None)
        paired_alignment = samtools.PairedAlignment(left, right, tag_length=1)
        self.assertEqual(None, paired_alignment.filter_value)

        left = ConnorAlign(mock_align(), filter_value='')
        right = ConnorAlign(mock_align(), filter_value='')
        paired_alignment = samtools.PairedAlignment(left, right, tag_length=1)
        self.assertEqual(None, paired_alignment.filter_value)

        left = ConnorAlign(mock_align(), filter_value='foo')
        right = ConnorAlign(mock_align(), filter_value=None)
        paired_alignment = samtools.PairedAlignment(left, right, tag_length=1)
        self.assertEqual(('foo', None), paired_alignment.filter_value)

        left = ConnorAlign(mock_align(), filter_value=None)
        right = ConnorAlign(mock_align(), filter_value='bar')
        paired_alignment = samtools.PairedAlignment(left, right, tag_length=1)
        self.assertEqual((None, 'bar'), paired_alignment.filter_value)
Example #13
0
def _mock_align_pair(query_name, filter_value=None):
    left = ConnorAlign(mock_align(query_name=query_name), filter_value)
    right = ConnorAlign(mock_align(query_name=query_name), filter_value)
    return MicroMock(query_name=query_name, left=left, right=right)
Example #14
0
 def test_orientation_sameIsNeither(self):
     pysam_align = mock_align(flag=129,
                              reference_start=100,
                              next_reference_start=100)
     self.assertEqual('neither', ConnorAlign(pysam_align).orientation)
Example #15
0
 def test_orientation_right(self):
     pysam_align = mock_align(reference_start=200, next_reference_start=100)
     self.assertEqual('right', ConnorAlign(pysam_align).orientation)
Example #16
0
    def test_settersPassthroughToPysamAlignSegment(self):
        pysam_align = mock_align(query_name="queryname_1",
                            flag=99,
                            reference_id=3,
                            reference_start=142,
                            mapping_quality=20,
                            cigarstring="8M",
                            next_reference_id=4,
                            next_reference_start=242,
                            template_length=100,
                            query_sequence="ACGTACGT",
                            query_qualities=[20]*8,
                            )
        connor_align = ConnorAlign(pysam_align)


        connor_align.query_name = 'queryname_11'
        connor_align.flag = 147
        connor_align.reference_id = 13
        connor_align.reference_start = 1142
        connor_align.mapping_quality = 120
        connor_align.cigarstring = "2S8M"
        connor_align.next_reference_id = 14
        connor_align.next_reference_start = 1242
        connor_align.template_length = 1100
        connor_align.query_sequence = "TTACGTACGT"
        connor_align.query_qualities = [20]*10
        connor_align.set_tag('X1', 'foo', 'Z')

        self.assertEqual('queryname_11', pysam_align.query_name)
        self.assertEqual(147, pysam_align.flag)
        self.assertEqual(13, pysam_align.reference_id)
        self.assertEqual(1142, pysam_align.reference_start)
        self.assertEqual(120, pysam_align.mapping_quality)
        self.assertEqual('2S8M', pysam_align.cigarstring)
        self.assertEqual(1242, pysam_align.next_reference_start)
        self.assertEqual(1100, pysam_align.template_length)
        self.assertEqual('TTACGTACGT',
                         ConnorAlignTest.byte_array_to_string(pysam_align.query_sequence))
        self.assertEqual([20] * 10, pysam_align.query_qualities)
        self.assertEqual(1150, pysam_align.reference_end)
        self.assertEqual(('foo', 'Z'),
                         pysam_align.get_tag('X1', with_value_type=True))