Esempio n. 1
0
    def test_011_lorenz_03(self):
        # everything covered, is at least covered even densely
        #             x x x x x
        #       x x x x x
        # - - - - - - - - - - - - - -

        test_id = 'blc_011'

        input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
        input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

        sam_to_sorted_bam(input_file_sam, input_file_bam)

        b = bamlorenzcoverage()
        idx, n = b.bam_file_to_idx(input_file_bam)
        lc = b.estimate_lorenz_curves(idx)

        # print(idx, file=sys.stderr)
        # denote that it only considers the (size of the) sequences described in the SAM header
        self.assertDictEqual(idx, {0: 6, 1: 6, 2: 2})

        # print(idx, file=sys.stderr)
        # denote that it only considers the (size of the) sequences described in the SAM header
        self.assertListEqual(lc['fraction_genome'], [0.0, 1.0 * 2 / 8, 1.0])
        self.assertListEqual(lc['fraction_reads'], [0.0, 1.0 * 4 / 10, 1.0])

        # additional stats
        self.assertEqual(n, 14)  # sam header say reference size is 14
        self.assertEqual(lc['roc'], 0.425)
        self.assertEqual(lc['total_sequenced_bases'], 10)
        self.assertEqual(lc['total_covered_positions_of_genome'], 8)
Esempio n. 2
0
    def test_005_deletion(self):
        test_id = 'blc_005'

        input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
        input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

        sam_to_sorted_bam(input_file_sam, input_file_bam)

        b = bamlorenzcoverage()
        idx, n = b.bam_file_to_idx(input_file_bam)

        # denote that it only considers the (size of the) sequences described in the SAM header
        self.assertDictEqual(idx, {0: 392, 1: 108})
Esempio n. 3
0
    def test_008_lorenz_01(self):
        #     x x x x
        # - - - - - - - - - -

        idx = {0: 6, 1: 4}

        b = bamlorenzcoverage()
        lc = b.estimate_lorenz_curves(idx)

        # print(idx, file=sys.stderr)
        # denote that it only considers the (size of the) sequences described in the SAM header
        self.assertListEqual(lc['fraction_genome'], [0.0, 1.0])
        self.assertListEqual(lc['fraction_reads'], [0.0, 1.0])

        # additional stats
        self.assertEqual(lc['roc'], 0.5)
Esempio n. 4
0
    def test_007_stacking(self):
        test_id = 'blc_007'

        input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
        input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

        sam_to_sorted_bam(input_file_sam, input_file_bam)

        b = bamlorenzcoverage()
        idx, n = b.bam_file_to_idx(input_file_bam)

        # denote that it only considers the (size of the) sequences described in the SAM header
        self.assertDictEqual(idx, {0: 372, 1: 48, 2: 80})

        # additional stats
        self.assertEqual(n, 500)
Esempio n. 5
0
    def test_010_lorenz_03(self):
        # everything covered, is at least covered even densely
        #         x x x x x
        #   x x x x x
        # - - - - - - - - - -

        idx = {0: 6, 1: 6, 2: 2}

        b = bamlorenzcoverage()
        lc = b.estimate_lorenz_curves(idx)

        # print(idx, file=sys.stderr)
        self.assertListEqual(lc['fraction_genome'], [0.0, 1.0 * 2 / 8, 1.0])
        self.assertListEqual(lc['fraction_reads'], [0.0, 1.0 * 4 / 10, 1.0])

        # additional stats
        self.assertEqual(lc['roc'], 0.425)
Esempio n. 6
0
    def test_013_bed(self):
        # everything covered, is at least covered even densely
        #             x x x x x
        #       x x x x x
        # - - - - - - - - - - - - - -

        test_id = 'blc_013'

        input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
        input_file_bed = TEST_DIR + "test_" + test_id + ".bed"
        input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

        sam_to_sorted_bam(input_file_sam, input_file_bam)

        b = bamlorenzcoverage()
        idx, n = b.bam_file_to_idx(input_file_bam, None, input_file_bed)

        self.assertEqual(n, 12)
Esempio n. 7
0
    def test_012_region(self):
        # everything covered, is at least covered even densely
        #             x x x x x
        #       x x x x x
        # - - - - - - - - - - - - - -

        test_id = 'blc_012'

        input_file_sam = TEST_DIR + "test_" + test_id + ".sam"
        input_file_bam = T_TEST_DIR + "test_" + test_id + ".bam"

        sam_to_sorted_bam(input_file_sam, input_file_bam)

        b = bamlorenzcoverage()
        idx, n = b.bam_file_to_idx(input_file_bam, 'chr1:2-14')

        self.assertEqual(
            n, 13
        )  # sam header say reference size is 14, but we start at 2nd position
Esempio n. 8
0
    def test_002_estimate_idx_from_bam(self):
        #           x x x x x
        #           x x x x x
        #           x x x x x
        # - - - - - - - - - - - - - - -

        # pct covered [  0 read ] =  15 / 15  = 100.0%
        # pct covered [ 1+ read ] =   5 / 15  =  33.3%
        # pct covered [ 2+ read ] =   5 / 15  =  33.3%
        # pct covered [ 3+ read ] =   5 / 15  =  33.3%

        idx = {0: 10, 3: 5}

        b = bamlorenzcoverage()
        cc = b.estimate_cumulative_coverage_curves(idx)

        # import sys
        # print(cc, file=sys.stderr)
        self.assertDictEqual(
            cc, {
                'minimum_coverage_depth': [0, 3],
                'percentage_genome_covered': [100.0, 100.0 * 1.0 / 3.0]
            })