예제 #1
0
    def test_from_existing_bam(self):
        bam_path = get_sample_data(Path("mbf_align/ex2.bam"))
        bam_job = ppg.FileInvariant(bam_path)
        genome = object()
        lane = mbf_align.AlignedSample("test_lane", bam_job, genome, False,
                                       "AA123")
        assert lane.name == "test_lane"
        assert lane.load()[0] is bam_job
        assert isinstance(lane.load()[1], ppg.FileInvariant)
        assert lane.genome is genome
        assert not lane.is_paired
        assert lane.vid == "AA123"

        with pytest.raises(ValueError):
            mbf_align.AlignedSample("test_lane", bam_job, genome, False,
                                    "AA123")
        lane2 = mbf_align.AlignedSample("test_lane2", bam_job, genome, True,
                                        "AA123")
        assert lane2.is_paired

        b = lane.get_bam()
        assert isinstance(b, pysam.Samfile)
        b = lane.get_unique_aligned_bam()
        assert isinstance(b, pysam.Samfile)
        assert lane.get_bam_names()[0] == bam_path
        assert lane.get_bam_names()[1] == bam_path + ".bai"

        assert lane.mapped_reads() == 8
        assert lane.unmapped_reads() == 0
        for job in get_qc_jobs():
            assert job._pruned
예제 #2
0
    def test_subtraction_by_read(self):
        from mbf_sampledata import get_human_22_fake_genome

        genome = get_human_22_fake_genome()
        lane = mbf_align.AlignedSample(
            "test_lane",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane2 = mbf_align.AlignedSample(
            "test_lane2",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA124",
        )  # index creation is automatic
        lane3 = mbf_align.AlignedSample(
            "test_lane3",
            get_sample_data(Path("mbf_align/chipseq_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        lane3_subset = mbf_align.AlignedSample(
            "test_lane3_subset",
            get_sample_data(Path("mbf_align/chipseq_chr22_subset.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic

        lane_empty = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane2), new_name="empty")
        lane_full = lane.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3), new_name="full")
        lane_some = lane3.post_process(
            mbf_align.post_process.SubtractOtherLane(lane3_subset),
            result_dir="results/aligned/shu",
        )
        qc_jobs = [
            lane_some.post_processor_qc_jobs, lane_full.post_processor_qc_jobs
        ]
        prune_qc(lambda job: job in qc_jobs)
        ppg.run_pipegraph()
        assert Path(lane_empty.get_bam_names()[1]).exists()
        assert Path(lane_full.get_bam_names()[1]).exists()
        assert lane_empty.mapped_reads() == 0
        assert lane_full.mapped_reads() == lane.mapped_reads()
        assert lane.mapped_reads() != 0
        assert (lane_some.mapped_reads() == lane3.mapped_reads() -
                lane3_subset.mapped_reads())
        assert lane3_subset.mapped_reads(
        )  # make sure there was something to subtract
        assert "shu" in lane_some.get_bam_names()[0]
        assert_image_equal(qc_jobs[0].filenames[0], "_result_dir")
        assert_image_equal(qc_jobs[0].filenames[0])
예제 #3
0
    def test_to_fastq(self):
        bam_path = get_sample_data(Path("mbf_align/ex2.bam"))
        bam_job = ppg.FileInvariant(bam_path)
        genome = object()
        lane = mbf_align.AlignedSample("test_lane", bam_job, genome, False,
                                       "AA123")
        fastq_path = "out.fastq"
        lane.to_fastq(fastq_path)
        ppg.run_pipegraph()
        assert Path(fastq_path).exists()
        assert (Path(fastq_path).read_text() == """@read_28833_29006_6945
AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG
+
<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<
@read_28701_28881_323b
TGCAAGGCCGCATCGGCCAAGGCCAAGATATAGGT
+
<<<<7<<<<<<<<<<<<;6<<<:;7<<<<;<<<<<
@read_28701_28881_323c
TGCAAGGCCGCATCGGCCAAGGCCAAGATATAGGT
+
<<<<7<<<<<<<<<<<<;6<<<:;7<<<<;<<<<<
@read_28701_28881_324a
TGCAAGGCCGCATCGGCCAAGGCCAAGATATAGGT
+
<<<<7<<<<<<<<<<<<;6<<<:;7<<<<;<<<<<
@read_28701_28881_324b
TGCAAGGCCGCATCGGCCAAGGCCAAGATATAGGT
+
<<<<7<<<<<<<<<<<<;6<<<:;7<<<<;<<<<<
@read_28701_28881_324c
TGCAAGGCCGCATCGGCCAAGGCCAAGATATAGGT
+
<<<<7<<<<<<<<<<<<;6<<<:;7<<<<;<<<<<
@test_clipped1
AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG
+
<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<
@test_clipped1
AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG
+
<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<
""")
        lane2 = mbf_align.AlignedSample("test_lane2",
                                        bam_job,
                                        genome,
                                        is_paired=True,
                                        vid="AA123")
        with pytest.raises(ValueError):
            lane2.to_fastq(
                "nope.fastq")  # no support for paired end data at this point
예제 #4
0
    def prep_lane(self):
        from mbf_sampledata import get_human_22_fake_genome

        # straight from chr22 of the human genome
        genome = get_human_22_fake_genome()

        lane = mbf_align.AlignedSample(
            "test_lane",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
        )
        return lane
예제 #5
0
    def test_chromosome_mapping(self):
        bam_path = get_sample_data(Path("mbf_align/ex2.bam"))
        bam_job = ppg.FileInvariant(bam_path)
        genome = DummyGenome()
        lane = mbf_align.AlignedSample("test_lane", bam_job, genome, False,
                                       "AA123")
        assert lane.name == "test_lane"
        assert lane.load()[0] is bam_job
        assert isinstance(lane.load()[1], ppg.FileInvariant)
        assert lane.genome is genome
        assert not lane.is_paired
        assert lane.vid == "AA123"

        with pytest.raises(ValueError):
            mbf_align.AlignedSample("test_lane", bam_job, genome, False,
                                    "AA123")
        lane2 = mbf_align.AlignedSample("test_lane2", bam_job, genome, True,
                                        "AA123")
        assert lane2.is_paired

        b = lane.get_bam()
        assert isinstance(b, pysam.Samfile)
        b
예제 #6
0
def gatk_test_lanes():
    genome_human = mbf_genomes.EnsemblGenome("Homo_sapiens", 96)
    input_samples = [
        [
            mbf_align.AlignedSample(
                "Test1GATK",
                "/project/code/mvariants/data/base_raw_test_hg36_Subread_gatk_rg.bam",
                genome_human,
                is_paired=False,
                vid=None,
            )
        ],
        [
            mbf_align.AlignedSample(
                "Test2GATK",
                "data/base_raw_test_hg3612_Subread_gatk_rg.bam",
                genome_human,
                is_paired=False,
                vid=None,
            )
        ],
    ]
    return input_samples
예제 #7
0
 def test_missing_index_file(self):
     bam_path = get_sample_data(Path("mbf_align/ex2.bam"))
     no_index = "noindex.bam"
     shutil.copy(bam_path, no_index)
     genome = object()
     lane = mbf_align.AlignedSample("test_lane", no_index, genome, False,
                                    "AA123")
     assert isinstance(lane.load()[0], ppg.FileInvariant)
     assert isinstance(lane.load()[1], ppg.FileGeneratingJob)
     assert lane.load()[1].job_id != "noindex.bam.bai"
     assert lane.load()[0] in lane.load()[1].prerequisites
     with pytest.raises(FileNotFoundError):
         lane.mapped_reads()
     ppg.run_pipegraph()
     assert lane.mapped_reads() == 8
예제 #8
0
    def test_creating_index_for_fg_job(self):
        def gen():
            shutil.copy(get_sample_data(Path("mbf_align/ex2.bam")),
                        "sample.bam")

        ppg.util.global_pipegraph.quiet = False

        job = ppg.FileGeneratingJob("sample.bam", gen)
        genome = object()
        lane = mbf_align.AlignedSample("test_lane", job, genome, False,
                                       "AA123")
        assert isinstance(lane.load()[1], ppg.FileGeneratingJob)
        assert lane.load()[0] in lane.load()[1].prerequisites
        ppg.run_pipegraph()
        assert Path("sample.bam").exists()
        assert Path("sample.bam.bai").exists()
예제 #9
0
 def test_lane_invariants_on_string(self):
     bam_path = get_sample_data(Path("mbf_align/ex2.bam"))
     genome = object()
     lane = mbf_align.AlignedSample("test_lane", bam_path, genome, False,
                                    "AA123")
     assert isinstance(lane.load()[0], ppg.FileInvariant)
예제 #10
0
 def test_lane_raises_on_multifilegeneratingJobWithNoBAM(self):
     mfg = ppg.MultiFileGeneratingJob(["a.sam"], lambda: 5)
     genome = object()
     with pytest.raises(ValueError):
         mbf_align.AlignedSample("test_lane", mfg, genome, False, "AA123")
예제 #11
0
 def test_lane_invariants_on_non_accepted_value(self):
     genome = object()
     with pytest.raises(ValueError):
         mbf_align.AlignedSample("test_lane", 123, genome, False, "AA123")
예제 #12
0
    def test_alignment_stats(self):
        from mbf_sampledata import get_human_22_fake_genome

        genome = get_human_22_fake_genome()
        lane = mbf_align.AlignedSample(
            "test_lane",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
        )  # index creation is automatic
        counts = {"get_bam": 0}

        def get_bam():
            counts["get_bam"] += 1

            class DummySam:
                mapped = 5
                unmapped = 10

                def __enter__(self):
                    return self

                def __exit__(self, *args):
                    pass

            return DummySam()

        lane.get_bam = get_bam
        assert lane.get_alignment_stats() == {"Mapped": 5, "Unmapped": 10}
        assert counts["get_bam"] == 1

        class DummyAlignerWithout:
            pass

        lane = mbf_align.AlignedSample(
            "test_lane2",
            get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")),
            genome,
            False,
            "AA123",
            aligner=DummyAlignerWithout(),
        )  # index creation is automatic
        lane.get_bam = get_bam
        assert counts["get_bam"] == 1
        assert lane.get_alignment_stats() == {"Mapped": 5, "Unmapped": 10}
        assert counts["get_bam"] == 2

        class DummyAlignerWith:
            def get_alignment_stats(self, bam_filename):
                assert (Path(bam_filename).resolve() == get_sample_path(
                    "mbf_align/rnaseq_spliced_chr22.bam").resolve())
                return {"Hello": 23}

        lane = mbf_align.AlignedSample(
            "test_lane3",
            get_sample_data("mbf_align/rnaseq_spliced_chr22.bam"),
            genome,
            False,
            "AA123",
            aligner=DummyAlignerWith(),
        )  # index creation is automatic
        lane.get_bam = get_bam
        assert counts["get_bam"] == 2
        assert lane.get_alignment_stats() == {"Hello": 23}
        assert counts["get_bam"] == 2