def test_pruning_plotjob(self, new_pipegraph): jobA = register_qc(ppg.PlotJob("c.png", lambda: None, lambda: None)) assert not jobA._pruned prune_qc() assert jobA._pruned assert jobA.cache_job._pruned assert jobA.table_job._pruned
def test_registration_and_pruning(self, new_pipegraph): with pytest.raises(TypeError): register_qc("shu") jobA = ppg.FileGeneratingJob("a", lambda: Path("a").write_text("hello")) register_qc(jobA) print(list(get_qc_jobs())) assert jobA in list(get_qc_jobs()) assert not jobA._pruned jobc = register_qc( ppg.FileGeneratingJob("c", lambda: Path("b").write_text("hello"))) def check_prune(job): return job.job_id.lower()[-1] == "c" prune_qc(check_prune) assert jobc in list(get_qc_jobs()) assert not jobc._pruned jobB = register_qc( ppg.FileGeneratingJob("b", lambda: Path("b").write_text("hello"))) assert jobB in list(get_qc_jobs()) assert jobB._pruned jobC = register_qc( ppg.FileGeneratingJob("C", lambda: Path("b").write_text("hello"))) assert not jobC._pruned assert len(list(get_qc_jobs())) == 4 prune_qc() assert jobA._pruned assert jobB._pruned assert jobc._pruned assert jobC._pruned for j in get_qc_jobs(): assert j._pruned
def test_volcano_plot(self): ppg.util.global_pipegraph.quiet = False import mbf_sampledata pasilla_data = pd.read_csv( mbf_sampledata.get_sample_path( "mbf_comparisons/pasillaCount_deseq2.tsv.gz"), sep=" ", ) # pasilla_data = pasilla_data.set_index('Gene') pasilla_data.columns = [str(x) for x in pasilla_data.columns] treated = [x for x in pasilla_data.columns if x.startswith("treated")] untreated = [ x for x in pasilla_data.columns if x.startswith("untreated") ] pasilla_data = DelayedDataFrame("pasilla", pasilla_data) comp = Comparisons(pasilla_data, { "treated": treated, "untreated": untreated }).a_vs_b("treated", "untreated", TTest()) comp.filter([("log2FC", "|>=", 2.0), ("FDR", "<=", 0.05)]) prune_qc(lambda job: "volcano" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] print(qc_jobs) assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])
def test_align_and_extract_umis(new_pipegraph): from mbf_align.post_process import AnnotateFastqBarcodes for folder in [ get_sample_path(Path("mbf_align/sample_extract_barcodes")), get_sample_path(Path("mbf_align/sample_extract_barcodes_gz")), ]: new_pipegraph.new_pipegraph() genome = get_human_22_fake_genome() mbf_qualitycontrol.prune_qc(lambda _: False) r = Sample("test", str(folder), False, pairing="only_second", vid="AA123") al = AlignedSample("test", str(folder / "test.bam"), genome, False, "AA123") x = al.post_process( AnnotateFastqBarcodes(r, { "XC": [0, 4], "XM": [7, 7 + 4] })) ppg.run_pipegraph() f = x.get_bam() r = next(f.fetch()) print(r.tags) assert r.get_tag("XC") == "AGTC" assert r.get_tag("XM") == "TGAC"
def test_subtraction_by_read(self): from mbf_sampledata import get_human_22_fake_genome genome = get_human_22_fake_genome() lane = mbf_align.AlignedSample( "test_lane", get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")), genome, False, "AA123", ) # index creation is automatic lane2 = mbf_align.AlignedSample( "test_lane2", get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")), genome, False, "AA124", ) # index creation is automatic lane3 = mbf_align.AlignedSample( "test_lane3", get_sample_data(Path("mbf_align/chipseq_chr22.bam")), genome, False, "AA123", ) # index creation is automatic lane3_subset = mbf_align.AlignedSample( "test_lane3_subset", get_sample_data(Path("mbf_align/chipseq_chr22_subset.bam")), genome, False, "AA123", ) # index creation is automatic lane_empty = lane.post_process( mbf_align.post_process.SubtractOtherLane(lane2), new_name="empty") lane_full = lane.post_process( mbf_align.post_process.SubtractOtherLane(lane3), new_name="full") lane_some = lane3.post_process( mbf_align.post_process.SubtractOtherLane(lane3_subset), result_dir="results/aligned/shu", ) qc_jobs = [ lane_some.post_processor_qc_jobs, lane_full.post_processor_qc_jobs ] prune_qc(lambda job: job in qc_jobs) ppg.run_pipegraph() assert Path(lane_empty.get_bam_names()[1]).exists() assert Path(lane_full.get_bam_names()[1]).exists() assert lane_empty.mapped_reads() == 0 assert lane_full.mapped_reads() == lane.mapped_reads() assert lane.mapped_reads() != 0 assert (lane_some.mapped_reads() == lane3.mapped_reads() - lane3_subset.mapped_reads()) assert lane3_subset.mapped_reads( ) # make sure there was something to subtract assert "shu" in lane_some.get_bam_names()[0] assert_image_equal(qc_jobs[0].filenames[0], "_result_dir") assert_image_equal(qc_jobs[0].filenames[0])
def _test_qc_plots(self, filename, remaining_job_count, chdir="."): lane = self.prep_lane() prune_qc(lambda job: filename in job.job_id) not_pruned_count = sum([1 for x in get_qc_jobs() if not x._pruned]) assert not_pruned_count == remaining_job_count # plot cache, plot_table, plot ppg.run_pipegraph() if chdir == '..': fn = lane.result_dir / chdir / filename else: fn = lane.result_dir / chdir / f"{lane.name}_{filename}" assert_image_equal(fn, suffix="_" + filename)
def test_ma_plot(self): ppg.util.global_pipegraph.quiet = False pasilla_data, treated, untreated = get_pasilla_data_subset() import numpy numpy.random.seed(500) comp = Comparisons(pasilla_data, { "treated": treated, "untreated": untreated }).a_vs_b("treated", "untreated", TTest(), laplace_offset=1) comp.filter([ ("log2FC", "|>=", 2.0), # ('FDR', '<=', 0.05), ]) prune_qc(lambda job: "ma_plot" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])
def test_correlation(self): ppg.util.global_pipegraph.quiet = False import mbf_sampledata pasilla_data = pd.read_csv( mbf_sampledata.get_sample_path( "mbf_comparisons/pasillaCount_deseq2.tsv.gz"), sep=" ", ) # pasilla_data = pasilla_data.set_index('Gene') pasilla_data.columns = [str(x) for x in pasilla_data.columns] treated = [x for x in pasilla_data.columns if x.startswith("treated")] untreated = [ x for x in pasilla_data.columns if x.startswith("untreated") ] pasilla_data = DelayedDataFrame("pasilla", pasilla_data) Comparisons(pasilla_data, {"treated": treated, "untreated": untreated}) prune_qc(lambda job: "correlation" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] print(qc_jobs) assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])