Ejemplo n.º 1
0
    def test_end_sortsAndIndexes(self):
        #pylint: disable=no-member
        with TempDirectory() as input_dir, TempDirectory() as output_dir:
            input_bam_filename = os.path.join(input_dir.path, "input.bam")
            make_bam_file(input_bam_filename, [build_read()])
            output_bam_filename = os.path.join(output_dir.path, "output.bam")
            mock_log = MockLog()
            handler = readhandler.WriteReadHandler(input_bam_filename,
                                                   output_bam_filename,
                                                   log_method=mock_log.log)
            read1 = build_read(query_name="read1",
                               reference_id=0,
                               reference_start=20)
            read2 = build_read(query_name="read2",
                               reference_id=0,
                               reference_start=10)

            handler.begin()
            handler.handle(read1, None, None)
            handler.handle(read2, None, None)
            handler.end()

            actual_files = sorted(os.listdir(output_dir.path))
            self.assertEquals(["output.bam", "output.bam.bai"], actual_files)
            actual_bam = pysam.AlignmentFile(output_bam_filename, "rb")
            actual_reads = [read for read in actual_bam.fetch()]
            actual_bam.close()

        self.assertEquals(2, len(actual_reads))
        self.assertEquals("read2", actual_reads[0].query_name)
        self.assertEquals("read1", actual_reads[1].query_name)
Ejemplo n.º 2
0
    def test_main(self):
        primer_file_content = \
'''Customer TargetID|Chr|Sense Start|Antisense Start|Sense Sequence|Antisense Sequence
primer1|1|101|200|AAGG|CCTT
primer2|2|501|600|CGCG|ATAT
'''.replace("|", "\t")
        CIGAR_10M = ((0, 10), )
        readA1 = build_read(query_name="readA",
                            query_sequence="AGCTTAGCTA",
                            flag=99,
                            reference_id=0,
                            reference_start=100,
                            cigar=CIGAR_10M,
                            next_reference_id=0,
                            next_reference_start=190,
                            template_length=80)
        readA2 = build_read(query_name="readA",
                            query_sequence="AGCTTAGCTA",
                            flag=147,
                            reference_id=0,
                            reference_start=190,
                            cigar=CIGAR_10M,
                            next_reference_id=0,
                            next_reference_start=100,
                            template_length=80)
        readB1 = build_read(query_name="readB",
                            query_sequence="AGCTTAGCTA",
                            flag=0,
                            reference_id=1,
                            reference_start=242,
                            cigar=CIGAR_10M,
                            next_reference_id=0,
                            next_reference_start=0,
                            template_length=0)

        with TempDirectory() as input_dir, TempDirectory() as output_dir:
            input_bam_filename = os.path.join(input_dir.path, "input.bam")
            output_bam_filename = os.path.join(output_dir.path, "output.bam")
            input_primers_filename = self._create_file(input_dir.path,
                                                       'primers.txt',
                                                       primer_file_content)
            make_bam_file(input_bam_filename, [readA1, readA2, readB1])

            clipper.main([
                "katana", input_primers_filename, input_bam_filename,
                output_bam_filename
            ])

            actual = self._bam_to_sam(output_bam_filename)

        self.assertRegexpMatches(actual[0], "readA.*chr1.*105.*4S6M.*191")
        self.assertRegexpMatches(actual[1], "readA.*chr1.*191.*6M4S.*105")
        self.assertEquals(2, len(actual))
Ejemplo n.º 3
0
    def test_check_input_bam_barcoded_okAtThreshold(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>>
readNameA2|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA2|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
'''
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            validator._check_input_bam_barcoded(args, self.mock_logger)
        self.ok()
        self.assertEquals(0, len(self.mock_logger._log_calls))
Ejemplo n.º 4
0
    def test_close_sortsAndIndexes(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))
            align2 = ConnorAlign(mock_align(query_name='align2',
                                            reference_start=200))
            align3 = ConnorAlign(mock_align(query_name='align3',
                                            reference_start=300))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family, pair, align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family, pair, align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2])

            writer.write('familyC', None, align3)
            writer.write('familyA', None, align1)
            writer.write('familyB', None, align2)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

            self.assertEqual(3, len(actual_aligns))
            self.assertEqual('align1', actual_aligns[0].query_name)
            self.assertEqual('align2', actual_aligns[1].query_name)
            self.assertEqual('align3', actual_aligns[2].query_name)
Ejemplo n.º 5
0
    def test_write_removesTagsWhenValueIsNone(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align1.set_tag('X1', 'No', 'Z')

            tag1 = BamTag('X1','Z', 'desc',
                          get_value = lambda family, pair, align: None)

            writer = samtools.AlignWriter(header, bam_path, [tag1])

            writer.write('familyA', None, align1)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(1, len(actual_aligns))
        self.assertEqual(0, len(align_tags))
Ejemplo n.º 6
0
    def test_check_input_bam_barcoded_rightUnbarcodedRaisesUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>>
readNameA2|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA2|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>>
readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
'''
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            regex = r'\[.*input.bam\] reads do not appear to have barcodes'
            self.assertRaisesRegexp(utils.UsageError, regex,
                                    validator._check_input_bam_barcoded, args)
Ejemplo n.º 7
0
 def test_check_input_bam_exists_raisesUsageError(self):
     with TempDirectory() as tmp_dir:
         input_bam_path = os.path.join(tmp_dir.path, 'input.bam')
         args = Namespace(input_bam=input_bam_path)
         self.assertRaisesRegexp(utils.UsageError,
                                 r'\[.*input.bam\] does not exist',
                                 validator._check_input_bam_exists, args)
Ejemplo n.º 8
0
 def test_check_input_bam_exists_ok(self):
     with TempDirectory() as tmp_dir:
         tmp_dir.write('input.bam', b'foo')
         input_bam_path = os.path.join(tmp_dir.path, 'input.bam')
         args = Namespace(input_bam=input_bam_path)
         validator._check_input_bam_exists(args)
         self.ok()
Ejemplo n.º 9
0
    def test_check_input_bam_barcoded_warnIfForced(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA1|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>>
readNameA2|99|chr10|100|20|8M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA2|147|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>>>>
readNameA3|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA3|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA4|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA4|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
readNameA5|99|chr10|100|20|3S5M|=|300|200|NNNAAAAA|>>>>>>>>
readNameA5|147|chr10|100|20|5M3S|=|300|200|AAAAANNN|>>>>>>>>
'''
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_barcoded(args, self.mock_logger)
        warnings = self.mock_logger._log_calls['WARNING']
        self.assertEqual(1, len(warnings))
        regex = r'\[.*input.bam\] reads do not appear to have barcodes.*forcing'
        self.assertRegexpMatches(warnings[0], regex)
Ejemplo n.º 10
0
 def test_check_input_bam_valid_raisesUsageError(self):
     with TempDirectory() as tmp_dir:
         tmp_dir.write('input.bam', b'foo')
         input_bam_path = os.path.join(tmp_dir.path, 'input.bam')
         args = Namespace(input_bam=input_bam_path)
         self.assertRaisesRegexp(utils.UsageError,
                                 r'\[.*input.bam\] not a valid BAM',
                                 validator._check_input_bam_valid, args)
Ejemplo n.º 11
0
 def test_init_defaultToNoTags(self):
     with TempDirectory() as tmp_dir:
         bam_path = os.path.join(tmp_dir.path, "destination.bam")
         header = { 'HD': {'VN': '1.0'},
                   'SQ': [{'LN': 1575, 'SN': 'chr1'},
                          {'LN': 1584, 'SN': 'chr2'}] }
         writer = samtools.AlignWriter(header, bam_path)
         writer.close()
     self.assertEqual([], writer._tags)
Ejemplo n.º 12
0
 def test_check_overwrite_output_ok(self):
     with TempDirectory() as tmp_dir:
         #            tmp_dir.write('input.bam', b'foo')
         deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam')
         annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam')
         args = Namespace(output_bam=deduped_bam_path,
                          annotated_output_bam=annotated_bam_path,
                          force=False)
         validator._check_overwrite_output(args, self.mock_logger)
     self.ok()
     self.assertEqual(0, len(self.mock_logger._log_calls))
Ejemplo n.º 13
0
    def test_sort_and_index_bam(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameB1|147|chr10|400|0|5M|=|200|100|CCCCC|>>>>>
readNameA1|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>>
readNameA1|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>>
readNameB1|99|chr10|200|0|5M|=|400|200|CCCCC|>>>>>
readNameA2|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>>
readNameA2|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>>
'''.replace("|", "\t")

        with TempDirectory() as tmp_dir:
            bam = create_bam(tmp_dir.path,
                             "input.sam",
                             sam_contents,
                              index=False)
            samtools.sort_and_index_bam(bam)
            alignments = samtools.alignment_file(bam, "rb").fetch()
            aligns = [(a.query_name, a.reference_start + 1) for a in alignments]
            self.assertEquals(6, len(aligns))
            self.assertEquals([("readNameA1", 100),
                               ("readNameA2", 100),
                               ("readNameB1", 200),
                               ("readNameA1", 300),
                               ("readNameA2", 300),
                               ("readNameB1", 400)],
                              aligns)

            original_dir = os.getcwd()
            try:
                os.chdir(tmp_dir.path)
                os.mkdir("tmp")
                bam = create_bam(os.path.join(tmp_dir.path, "tmp"),
                                 "input.sam",
                                 sam_contents,
                                 index=False)
                bam_filename = os.path.basename(bam)

                samtools.sort_and_index_bam(os.path.join("tmp", bam_filename))

                alignments = samtools.alignment_file(bam, "rb").fetch()
                aligns = [(a.query_name,
                           a.reference_start + 1) for a in alignments]
                self.assertEquals(6, len(aligns))
                self.assertEquals([("readNameA1", 100),
                                   ("readNameA2", 100),
                                   ("readNameB1", 200),
                                   ("readNameA1", 300),
                                   ("readNameA2", 300),
                                   ("readNameB1", 400)],
                                  aligns)
            finally:
                os.chdir(original_dir)
Ejemplo n.º 14
0
    def test_check_input_bam_indexed_ok(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path)
            validator._check_input_bam_indexed(args)
            self.ok()
Ejemplo n.º 15
0
 def test_check_overwrite_output_raisesUsageErrorIfAnnotatedPresent(self):
     with TempDirectory() as tmp_dir:
         tmp_dir.write('annotated.bam', b'foo')
         deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam')
         annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam')
         args = Namespace(output_bam=deduped_bam_path,
                          annotated_output_bam=annotated_bam_path,
                          force=False)
         self.assertRaisesRegexp(utils.UsageError,
                                 r'\[.*annotated.bam\] exist.*force',
                                 validator._check_overwrite_output, args,
                                 self.mock_logger)
         self.assertEqual(0, len(self.mock_logger._log_calls))
Ejemplo n.º 16
0
 def test_check_overwrite_output_warnIfForced(self):
     with TempDirectory() as tmp_dir:
         tmp_dir.write('deduped.bam', b'foo')
         tmp_dir.write('annotated.bam', b'bar')
         deduped_bam_path = os.path.join(tmp_dir.path, 'deduped.bam')
         annotated_bam_path = os.path.join(tmp_dir.path, 'annotated.bam')
         args = Namespace(output_bam=deduped_bam_path,
                          annotated_output_bam=annotated_bam_path,
                          force=True)
         validator._check_overwrite_output(args, self.mock_logger)
     warnings = self.mock_logger._log_calls['WARNING']
     regex = r'\[.*deduped.bam, .*annotated.bam\] exist.*forcing'
     self.assertEqual(1, len(warnings))
     self.assertRegexpMatches(warnings[0], regex)
Ejemplo n.º 17
0
    def test_check_input_bam_indexed_raisesUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=False)
            args = Namespace(input_bam=input_bam_path)
            self.assertRaisesRegexp(utils.UsageError,
                                    r'\[.*input.bam\] is not indexed',
                                    validator._check_input_bam_indexed, args)
Ejemplo n.º 18
0
    def test_check_input_bam_not_empty_raiseUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            self.assertRaisesRegexp(utils.UsageError,
                                    r'\[.*input.bam\] is empty',
                                    validator._check_input_bam_not_empty, args)
Ejemplo n.º 19
0
    def test_check_input_bam_not_deduped_ok(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
@PG|ID:foo|PN:bwa
readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_not_deduped(args, self.mock_logger)
        self.ok()
        self.assertEqual(0, len(self.mock_logger._log_calls))
Ejemplo n.º 20
0
    def test_check_input_bam_paired_raisesUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|{flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''
        sam_contents = sam_contents.format(flag='16').replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            regex = r'\[.*input.bam\] does not appear to contain paired reads'
            self.assertRaisesRegexp(utils.UsageError, regex,
                                    validator._check_input_bam_paired, args)
Ejemplo n.º 21
0
    def test_close_logs(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1',
                                            reference_start=100))

            writer = samtools.AlignWriter(header, bam_path, [])

            writer.write('familyA', None, align1)
            writer.close(log=self.mock_logger)
        info_log_lines = self.mock_logger._log_calls['INFO']
        self.assertEqual(1, len(info_log_lines))
        self.assertRegexpMatches(info_log_lines[0], 'destination.bam')
Ejemplo n.º 22
0
    def test_check_input_bam_not_deduped_raisesUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
@PG|ID:foo|PN:connor
readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            regex = (r'\[.*input.bam\] has already been processed with Connor'
                     r'.*Are you sure.*force')
            self.assertRaisesRegexp(utils.UsageError, regex,
                                    validator._check_input_bam_not_deduped,
                                    args)
Ejemplo n.º 23
0
    def test_check_input_bam_paired_ok(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|{unpaired_flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA1|{paired_flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''
        sam_contents = sam_contents.format(unpaired_flag='16', paired_flag='99')
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_paired(args, self.mock_logger)
        self.ok()
        self.assertEqual(0, len(self.mock_logger._log_calls))
Ejemplo n.º 24
0
    def test_write_addsAlignTags(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, 'destination.bam')
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name='align1'))
            align2 = ConnorAlign(mock_align(query_name='align2'))
            align3 = ConnorAlign(mock_align(query_name='align3'))

            tag1 = BamTag('X1','Z', 'desc',
                          get_value=lambda family,pair,align: family)
            tag2 = BamTag('X2','Z', 'desc',
                          get_value=lambda family,pair,align: pair)
            tag3 = BamTag('X3','Z', 'desc',
                          get_value=lambda family,pair,align: align.query_name)

            writer = samtools.AlignWriter(header, bam_path, [tag1, tag2, tag3])

            writer.write('familyA', 'pair1', align1)
            writer.write('familyB', 'pair2', align2)
            writer.write('familyC', 'pair3', align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_aligns = [a for a in bamfile.fetch()]
            bamfile.close()

        align_tags = {}
        for actual_align in actual_aligns:
            for t_name, t_val, t_type  in actual_align.get_tags(with_value_type=True):
                key = (actual_align.query_name, t_name)
                t_type = AlignWriterTest.fix_pysam_inconsistent_tag_type(t_type)
                align_tags[key] = "{}:{}:{}".format(t_name, t_type, t_val)

        self.assertEqual(3, len(actual_aligns))
        self.assertEqual("X1:Z:familyA", align_tags[('align1', 'X1')])
        self.assertEqual("X1:Z:familyB", align_tags[('align2', 'X1')])
        self.assertEqual("X1:Z:familyC", align_tags[('align3', 'X1')])
        self.assertEqual("X2:Z:pair1", align_tags[('align1', 'X2')])
        self.assertEqual("X2:Z:pair2", align_tags[('align2', 'X2')])
        self.assertEqual("X2:Z:pair3", align_tags[('align3', 'X2')])
        self.assertEqual("X3:Z:align1", align_tags[('align1', 'X3')])
        self.assertEqual("X3:Z:align2", align_tags[('align2', 'X3')])
        self.assertEqual("X3:Z:align3", align_tags[('align3', 'X3')])
Ejemplo n.º 25
0
    def test_check_input_bam_not_deduped_warnIfForced(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
@PG|ID:foo|PN:connor
readNameA1|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_not_deduped(args, log=self.mock_logger)
            warnings = self.mock_logger._log_calls['WARNING']
            self.assertEqual(1, len(warnings))
            regex = (r'\[.*input.bam\] has already been processed with Connor'
                     r'.*forcing')
            self.assertRegexpMatches(warnings[0], regex)
Ejemplo n.º 26
0
    def test_check_input_bam_paired_warnIfForced(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|{flag}|chr10|100|20|5M|=|300|200|AAAAA|>>>>>'''
        sam_contents = sam_contents.format(flag='16').replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_paired(args, self.mock_logger)
        warnings = self.mock_logger._log_calls['WARNING']
        self.assertEqual(1, len(warnings))
        regex = (r'\[.*input.bam\] does not appear to contain paired '
                 r'reads.*forcing')
        self.assertRegexpMatches(warnings[0], regex)
Ejemplo n.º 27
0
    def test_check_input_bam_consistent_length_warnIfForced(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!!
readNameA2|99|chr10|100|20|8M|=|300|200|AAAAANNN|>>>>>!!!
readNameA3|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA4|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA5|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA6|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA7|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA8|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA9|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA0|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA1|147|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!!
readNameA2|147|chr10|100|20|5M|=|300|200|AAANN|>>>!!
readNameA3|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA4|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA5|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA6|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA7|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA8|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA9|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA0|147|chr10|100|20|3M|=|300|200|AAA|>>>
'''
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=True)
            validator._check_input_bam_consistent_length(
                args, self.mock_logger)
        self.ok()
        warnings = self.mock_logger._log_calls['WARNING']
        self.assertEqual(1, len(warnings))
        regex = (r'\[.*input.bam\] reads appear to have inconsistent '
                 r'sequence lengths\..*forcing')
        self.assertRegexpMatches(warnings[0], regex)
Ejemplo n.º 28
0
    def test_total_align_count(self):
        self.check_sysout_safe()
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameB1|147|chr10|400|0|5M|=|200|100|CCCCC|>>>>>
readNameA1|147|chr10|300|0|5M|=|100|100|AAAAA|>>>>>
readNameA1|99|chr10|100|0|5M|=|300|200|AAAAA|>>>>>
readNameB1|99|chr10|200|0|5M|=|400|200|CCCCC|>>>>>
readNameC1|12|chr10|400|0|*|=|200|100|CCCCC|>>>>>
readNameC1|12|chr10|400|0|*|=|200|100|CCCCC|>>>>>
readNameZ1|77|*|0|0|*|*|0|0|TTTTT|>>>>>
readNameZ1|141|*|0|0|*|*|0|0|GGGGG|>>>>>
'''.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam = create_bam(tmp_dir.path,
                                   'input.sam',
                                   sam_contents,
                                   index=False)
            samtools.sort_and_index_bam(input_bam)
            actual_count = samtools.total_align_count(input_bam)
            self.assertEqual(6, actual_count)
Ejemplo n.º 29
0
    def test_write(self):
        with TempDirectory() as tmp_dir:
            bam_path = os.path.join(tmp_dir.path, "destination.bam")
            header = { 'HD': {'VN': '1.0'},
                      'SQ': [{'LN': 1575, 'SN': 'chr1'},
                             {'LN': 1584, 'SN': 'chr2'}] }
            align1 = ConnorAlign(mock_align(query_name="align1"))
            align2 = ConnorAlign(mock_align(query_name="align2"))
            align3 = ConnorAlign(mock_align(query_name="align3"))
            family = None
            writer = samtools.AlignWriter(header, bam_path)

            writer.write(family, None, align1)
            writer.write(family, None, align2)
            writer.write(family, None, align3)
            writer.close()

            bamfile = samtools.alignment_file(bam_path, 'rb')
            actual_query_names = [align.query_name for align in bamfile.fetch()]
            bamfile.close()

        self.assertEqual(['align1', 'align2', 'align3'], actual_query_names)
Ejemplo n.º 30
0
    def test_check_input_bam_consistent_length_negRaisesUsageError(self):
        sam_contents = \
'''@HD|VN:1.4|GO:none|SO:coordinate
@SQ|SN:chr10|LN:135534747
readNameA1|99|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!!
readNameA2|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA3|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA4|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA5|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA6|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA7|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA8|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA9|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA0|99|chr10|100|20|5M|=|300|200|AAAAA|>>>>>
readNameA1|147|chr10|100|20|10M|=|300|200|AAAAANNNNN|>>>>>!!!!!
readNameA2|147|chr10|100|20|5M|=|300|200|AAANN|>>>!!
readNameA3|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA4|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA5|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA6|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA7|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA8|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA9|147|chr10|100|20|3M|=|300|200|AAA|>>>
readNameA0|147|chr10|100|20|3M|=|300|200|AAA|>>>
'''
        sam_contents = sam_contents.replace("|", "\t")
        with TempDirectory() as tmp_dir:
            input_bam_path = samtools_test.create_bam(tmp_dir.path,
                                                      "input.sam",
                                                      sam_contents,
                                                      index=True)
            args = Namespace(input_bam=input_bam_path, force=False)
            regex = (r'\[.*input.bam\] reads appear to have inconsistent '
                     r'sequence lengths\..*force')
            self.assertRaisesRegexp(UsageError,
                                    regex,
                                    validator._check_input_bam_consistent_length,
                                    args)