Esempio n. 1
0
def test_calc_coverage_4():
    """If uniqueley_aligned_only is True skip any mapping of read
        that are aligned to more than on location.
    """
    coverage_calculator = CoverageCalculator(uniquely_aligned_only=True)
    bam_file_4 = generate_bam_file(ccd.sam_content_3, ccd.sam_bam_prefix)
    coverage_calculator._init_coverage_list(bam_file_4.lengths[0])
    coverage_calculator._calc_coverage("chrom", bam_file_4)
    assert (coverage_calculator._coverages["forward"][0:15]).all() == (
        np.array([
            3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 0.0, 0.0, 0.0,
            0.0, 0.0
        ])).all()
Esempio n. 2
0
def test_calc_coverage_3():
    """If read_count_splitting is set to False then every
        mapping is counted as one to each of the matching position
        independent how often its read is mapped in in total.
    """
    coverage_calculator = CoverageCalculator(read_count_splitting=False)
    bam_file_3 = generate_bam_file(ccd.sam_content_2, ccd.sam_bam_prefix)
    coverage_calculator._init_coverage_list(bam_file_3.lengths[0])
    coverage_calculator._calc_coverage("chrom", bam_file_3)
    assert (coverage_calculator._coverages["forward"][0:15]).all() == (
        np.array([
            1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0,
            0.0, 0.0
        ])).all()
Esempio n. 3
0
def test_calc_coverage_5():
    """If first_base_only is True only the first nucleotide of a
        mapping is considered.
    """
    coverage_calculator = CoverageCalculator(coverage_style="first_base_only")
    bam_file_5 = generate_bam_file(ccd.sam_content_1, ccd.sam_bam_prefix)
    coverage_calculator._init_coverage_list(bam_file_5.lengths[0])
    coverage_calculator._calc_coverage("chrom", bam_file_5)
    assert (coverage_calculator._coverages["forward"][0:15]).all() == (
        np.array([
            5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0
        ])).all()
    assert (coverage_calculator._coverages["reverse"][0:15]).all() == (
        np.array([
            0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -5.0, 0.0, 0.0, 0.0,
            0.0, 0.0
        ])).all()
class TestCoverageCalculator(unittest.TestCase):

    def setUp(self):
        self.coverage_calculator = CoverageCalculator()
        self.example_data = ExampleData()
        self._sam_bam_prefix = "dummy"

    def tearDown(self):
        for suffix in [".sam", ".bam", ".bam.bai"]:
            if os.path.exists(self._sam_bam_prefix + suffix) is True:
                os.remove(self._sam_bam_prefix + suffix)

    def _generate_bam_file(self, sam_content, file_prefix):
        sam_file = "%s.sam" % file_prefix
        bam_file = "%s.bam" % file_prefix
        sam_fh = open(sam_file, "w")
        sam_fh.write(sam_content)
        sam_fh.close()
        pysam.view("-Sb", "-o%s" % bam_file, sam_file)
        pysam.index(bam_file)
        self._bam = pysam.Samfile(bam_file)

    def test_init_coverage_list(self):
        self.coverage_calculator._init_coverage_list(10)
        self.assertListEqual(
            sorted(self.coverage_calculator._coverages.keys()),
            ["forward", "reverse"])
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"], [0.0] * 10)
        self.assertListEqual(
            self.coverage_calculator._coverages["reverse"], [0.0] * 10)

    def test_calc_coverage_1(self):
        """Check correct start at first list element"""
        self._generate_bam_file(
            self.example_data.sam_content_1, self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertEqual(
            len(self.coverage_calculator._coverages["forward"]), 1500)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15],
            [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
             0.0, 0.0, 0.0, 0.0, 0.0])
        self.assertListEqual(
            self.coverage_calculator._coverages["reverse"][0:15],
            [-5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0,
             0.0, 0.0, 0.0, 0.0, 0.0])

    def test_calc_coverage_2(self):
        """Consider how often a read is mapped. Mappings of reads that
        are aligned to several location contribute only fractions to
        the counting.

        """
        self._generate_bam_file(
            self.example_data.sam_content_2, self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages[
                "forward"][0:15],
            [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
             0.0, 0.0, 0.0, 0.0, 0.0])

    def test_calc_coverage_3(self):
        """If read_count_splitting is set to False then every
        mapping is counted as one to each of the matching position
        independent how often its read is mapped in in total.
        """
        self.coverage_calculator = CoverageCalculator(
            read_count_splitting=False)
        self._generate_bam_file(
            self.example_data.sam_content_2, self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages[
                "forward"][0:15],
            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
             0.0, 0.0, 0.0, 0.0, 0.0])

    def test_calc_coverage_4(self):
        """If uniqueley_aligned_only is True skip any mapping of read
        that are aligned to more than on location.
        """
        self.coverage_calculator = CoverageCalculator(
            uniqueley_aligned_only=True)
        self._generate_bam_file(
            self.example_data.sam_content_3, self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages[
                "forward"][0:15],
            [3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0,
             0.0, 0.0, 0.0, 0.0, 0.0])

    def test_calc_coverage_5(self):
        """If first_base_only is True only the first nucleotide of a
        mapping is considered.
        """
        self.coverage_calculator = CoverageCalculator(first_base_only=True)
        self._generate_bam_file(
            self.example_data.sam_content_1, self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages[
                "forward"][0:15],
                [5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                 0.0, 0.0, 0.0, 0.0, 0.0])
        self.assertListEqual(
            self.coverage_calculator._coverages[
                "reverse"][0:15],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -5.0,
             0.0, 0.0, 0.0, 0.0, 0.0])
class TestCoverageCalculator(unittest.TestCase):
    def setUp(self):
        self.coverage_calculator = CoverageCalculator()
        self.example_data = ExampleData()
        self._sam_bam_prefix = "dummy"

    def tearDown(self):
        for suffix in [".sam", ".bam", ".bam.bai"]:
            if os.path.exists(self._sam_bam_prefix + suffix) is True:
                os.remove(self._sam_bam_prefix + suffix)

    def _generate_bam_file(self, sam_content, file_prefix):
        sam_file = "{}.sam".format(file_prefix)
        bam_file = "{}.bam".format(file_prefix)
        sam_fh = open(sam_file, "w")
        sam_fh.write(sam_content)
        sam_fh.close()
        pysam.view("-Sb",
                   "-o{}".format(bam_file),
                   sam_file,
                   catch_stdout=False)
        pysam.index(bam_file)
        self._bam = pysam.Samfile(bam_file)

    def test_init_coverage_list(self):
        self.coverage_calculator._init_coverage_list(10)
        self.assertListEqual(
            sorted(self.coverage_calculator._coverages.keys()),
            ["forward", "reverse"],
        )
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"].tolist(),
            [0.0] * 10)
        self.assertListEqual(
            self.coverage_calculator._coverages["reverse"].tolist(),
            [0.0] * 10)

    def test_calc_coverage_1(self):
        """Check correct start at first list element"""
        self._generate_bam_file(self.example_data.sam_content_1,
                                self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertEqual(len(self.coverage_calculator._coverages["forward"]),
                         1500)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15].tolist(),
            [
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                5.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )
        self.assertListEqual(
            self.coverage_calculator._coverages["reverse"][0:15].tolist(),
            [
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                -5.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )

    def test_calc_coverage_2(self):
        """Consider how often a read is mapped. Mappings of reads that
        are aligned to several location contribute only fractions to
        the counting.

        """
        self._generate_bam_file(self.example_data.sam_content_2,
                                self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15].tolist(),
            [
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.5,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )

    def test_calc_coverage_3(self):
        """If read_count_splitting is set to False then every
        mapping is counted as one to each of the matching position
        independent how often its read is mapped in in total.
        """
        self.coverage_calculator = CoverageCalculator(
            read_count_splitting=False)
        self._generate_bam_file(self.example_data.sam_content_2,
                                self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15].tolist(),
            [
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                1.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )

    def test_calc_coverage_4(self):
        """If uniqueley_aligned_only is True skip any mapping of read
        that are aligned to more than on location.
        """
        self.coverage_calculator = CoverageCalculator(
            uniquely_aligned_only=True)
        self._generate_bam_file(self.example_data.sam_content_3,
                                self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15].tolist(),
            [
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                3.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )

    def test_calc_coverage_5(self):
        """If first_base_only is True only the first nucleotide of a
        mapping is considered.
        """
        self.coverage_calculator = CoverageCalculator(
            coverage_style="first_base_only")
        self._generate_bam_file(self.example_data.sam_content_1,
                                self._sam_bam_prefix)
        self.coverage_calculator._init_coverage_list(self._bam.lengths[0])
        self.coverage_calculator._calc_coverage("chrom", self._bam)
        self.assertListEqual(
            self.coverage_calculator._coverages["forward"][0:15].tolist(),
            [
                5.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )
        self.assertListEqual(
            self.coverage_calculator._coverages["reverse"][0:15].tolist(),
            [
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
                -5.0,
                0.0,
                0.0,
                0.0,
                0.0,
                0.0,
            ],
        )