def test_expected_outputs_standardSE(self): """ expected_outputs: standard single-end, no strandedness """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Reference outputs reference_outputs = ( "PJB1_S1_R1_001_fastqc", "PJB1_S1_R1_001_fastqc.html", "PJB1_S1_R1_001_fastqc.zip", "PJB1_S1_R1_001_model_organisms_screen.png", "PJB1_S1_R1_001_model_organisms_screen.txt", "PJB1_S1_R1_001_other_organisms_screen.png", "PJB1_S1_R1_001_other_organisms_screen.txt", "PJB1_S1_R1_001_rRNA_screen.png", "PJB1_S1_R1_001_rRNA_screen.txt", ) expected = expected_outputs(AnalysisProject( p.name, os.path.join(self.wd, p.name)), "qc", qc_protocol="standardSE") for e in expected: self.assertEqual(os.path.dirname(e), os.path.join(self.wd, p.name, "qc")) self.assertTrue(os.path.basename(e) in reference_outputs) for r in reference_outputs: self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_check_illumina_qc_outputs_singlecell_some_missing(self): """ check_illumina_qc_outputs: some illumina_qc.sh outputs missing (singlecell) """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Add QC artefacts project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( include_fastq_strand=False, include_multiqc=False) # Remove some outputs for f in ( "PJB1_S1_R2_001_fastqc.html", "PJB1_S1_R2_001_model_organisms_screen.txt", ): os.remove(os.path.join(project.qc_dir, f)) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="singlecell"), [os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz")])
def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_check_fastq_strand_outputs_singlecell_missing(self): """ check_fastq_strand_outputs: fastq_strand.py output missing (singlecell) """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Make fastq_strand.conf fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf") with open(fastq_strand_conf, 'w') as fp: fp.write("") # Check the outputs self.assertEqual( check_fastq_strand_outputs(project, "qc", fastq_strand_conf, qc_protocol="singlecell"), [ (os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz"), ), ])
def _make_analysis_project(self, paired_end=True): # Create a mock Analysis Project directory self._make_working_dir() # Generate names for fastq files to add if paired_end: reads = (1, 2) else: reads = (1, ) sample_names = ('PJB1', 'PJB2') fastq_names = [] for i, sname in enumerate(sample_names, start=1): for read in reads: fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read) fastq_names.append(fq) self.analysis_dir = MockAnalysisProject('PJB', fastq_names) # Create the mock directory self.analysis_dir.create(top_dir=self.wd) # Populate with fake QC products qc_dir = os.path.join(self.wd, self.analysis_dir.name, 'qc') qc_logs = os.path.join(qc_dir, 'logs') os.mkdir(qc_dir) os.mkdir(qc_logs) for fq in fastq_names: # FastQC MockQCOutputs.fastqc_v0_11_2(fq, qc_dir) # Fastq_screen MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'model_organisms') MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'other_organisms') MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'rRNA') return os.path.join(self.wd, self.analysis_dir.name)
def test_determine_qc_protocol_standardPE(self): """determine_qc_protocol: standard paired-end run """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "standardPE")
def test_verify_qc_no_outputs(self): """verify_qc: project with no QC outputs """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Do verification self.assertFalse(verify_qc(project))
def test_determine_qc_protocol_10xchromium3v3(self): """determine_qc_protocol: single-cell run (10xGenomics Chromium 3'v3) """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Single cell platform': "10xGenomics Chromium 3'v3"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "singlecell")
def test_verify_qc_all_outputs(self): """verify_qc: project with all QC outputs present """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Do verification self.assertTrue(verify_qc(project))
def setUp(self): # Create a temp working dir self.dirn = tempfile.mkdtemp(suffix='TestCellrangerMulti') # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_GEX_S1_R1_001.fastq.gz", "PJB1_GEX_S1_R2_001.fastq.gz", "PJB2_MC_S2_R1_001.fastq.gz", "PJB2_MC_S2_R2_001.fastq.gz",), metadata={ 'Organism': 'Human', 'Single cell platform': "10xGenomics Chromium 3'v3" }) p.create(top_dir=self.dirn) self.project = AnalysisProject("PJB",os.path.join(self.dirn,"PJB"))
def test_determine_qc_protocol_10xchromium3v2_atac_seq(self): """determine_qc_protocol: single-cell ATAC-seq (10xGenomics Single Cell ATAC) """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={ 'Single cell platform': "10xGenomics Single Cell ATAC", 'Library type': "scATAC-seq" }) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "10x_scATAC")
def _make_mock_analysis_project(self, single_cell_platform, library_type): # Create a mock AnalysisProject m = MockAnalysisProject('PJB', fastq_names=( "PJB1_S1_L001_R1_001.fastq.gz", "PJB1_S1_L001_R2_001.fastq.gz", ), metadata={ 'Single cell platform': single_cell_platform, 'Library type': library_type, }) m.create(top_dir=self.wd) return os.path.join(self.wd, 'PJB')
def test_expected_outputs_standardPE_with_strand(self): """ expected_outputs: standard paired-end with strandedness """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Make mock fastq_strand mock_fastq_strand_conf = os.path.join(self.wd, p.name, "fastq_strand.conf") with open(mock_fastq_strand_conf, 'w') as fp: fp.write("") # Reference outputs reference_outputs = ( "PJB1_S1_R1_001_fastqc", "PJB1_S1_R1_001_fastqc.html", "PJB1_S1_R1_001_fastqc.zip", "PJB1_S1_R1_001_model_organisms_screen.png", "PJB1_S1_R1_001_model_organisms_screen.txt", "PJB1_S1_R1_001_other_organisms_screen.png", "PJB1_S1_R1_001_other_organisms_screen.txt", "PJB1_S1_R1_001_rRNA_screen.png", "PJB1_S1_R1_001_rRNA_screen.txt", "PJB1_S1_R2_001_fastqc", "PJB1_S1_R2_001_fastqc.html", "PJB1_S1_R2_001_fastqc.zip", "PJB1_S1_R2_001_model_organisms_screen.png", "PJB1_S1_R2_001_model_organisms_screen.txt", "PJB1_S1_R2_001_other_organisms_screen.png", "PJB1_S1_R2_001_other_organisms_screen.txt", "PJB1_S1_R2_001_rRNA_screen.png", "PJB1_S1_R2_001_rRNA_screen.txt", "PJB1_S1_R1_001_fastq_strand.txt", ) expected = expected_outputs(AnalysisProject( p.name, os.path.join(self.wd, p.name)), "qc", fastq_strand_conf=mock_fastq_strand_conf, qc_protocol="standardPE") for e in expected: self.assertEqual(os.path.dirname(e), os.path.join(self.wd, p.name, "qc")) self.assertTrue(os.path.basename(e) in reference_outputs) for r in reference_outputs: self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
def _make_analysis_project(self, paired_end=True): # Create a mock Analysis Project directory self._make_working_dir() # Generate names for fastq files to add if paired_end: reads = (1, 2) else: reads = (1,) sample_names = ("PJB1", "PJB2") fastq_names = [] for i, sname in enumerate(sample_names, start=1): for read in reads: fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read) fastq_names.append(fq) self.analysis_dir = MockAnalysisProject("PJB", fastq_names) # Create the mock directory self.analysis_dir.create(top_dir=self.wd) # Populate with fake QC products qc_dir = os.path.join(self.wd, self.analysis_dir.name, "qc") qc_logs = os.path.join(qc_dir, "logs") os.mkdir(qc_dir) os.mkdir(qc_logs) for fq in fastq_names: # FastQC MockQCOutputs.fastqc_v0_11_2(fq, qc_dir) # Fastq_screen MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "model_organisms") MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "other_organisms") MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "rRNA") return os.path.join(self.wd, self.analysis_dir.name)
def test_check_illumina_qc_outputs_standardSE_all_missing(self): """ check_illumina_qc_outputs: all illumina_qc.sh outputs missing (standardSE) """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Get the outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="standardSE"), project.fastqs)
def test_verify_qc_incomplete_outputs(self): """verify_qc: project with some QC outputs missing """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Remove an output os.remove( os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html")) # Do verification self.assertFalse(verify_qc(project))
def test_report_qc_no_outputs(self): """report_qc: project with no QC outputs """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Do reporting self.assertEqual(report_qc(project), 1) # Check output and reports for f in ("qc_report.html", "qc_report.PJB.zip", "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s (should be missing)" % f)
def test_determine_qc_protocol_10x_visium(self): """determine_qc_protocol: spatial RNA-seq run (10xGenomics Visium) """ # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Single cell platform': "10xGenomics Visium", 'Library type': "scATAC-seq"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd,"PJB")) self.assertEqual(determine_qc_protocol(project), "10x_Visium")
def test_determine_qc_protocol_10xchromium3v3_cellplex(self): """determine_qc_protocol: cell multiplexing CellPlex (10xGenomics Chromium 3'v3) """ # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Single cell platform': "10xGenomics Chromium 3'v3", 'Library type': "CellPlex"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd,"PJB")) self.assertEqual(determine_qc_protocol(project), "10x_CellPlex")
def test_check_illumina_qc_outputs_standardSE_all_present(self): """ check_illumina_qc_outputs: all illumina_qc.sh outputs present (standardSE) """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Add QC artefacts project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( include_fastq_strand=False, include_multiqc=False) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="standardSE"), [])
def test_report_qc_all_outputs(self): """report_qc: project with all QC outputs present """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Do reporting self.assertEqual(report_qc(project), 0) # Check output and reports for f in ("qc_report.html", "qc_report.PJB.zip", "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_determine_qc_protocol_parse_evercode(self): """determine_qc_protocol: Parse Evercode single cell RNA-seq run """ # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz", "PJB1_S1_I1_001.fastq.gz", "PJB1_S1_I2_001.fastq.gz"), metadata={'Single cell platform': "Parse Evercode", 'Library type': "scRNA-seq"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd,"PJB")) self.assertEqual(determine_qc_protocol(project), "ParseEvercode")
def test_determine_qc_protocol_10x_multiome_gex(self): """determine_qc_protocol: single cell multiome GEX run (10xGenomics Multiome GEX) """ # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz", "PJB1_S1_I1_001.fastq.gz", "PJB1_S1_I2_001.fastq.gz"), metadata={'Single cell platform': "10xGenomics Single Cell Multiome", 'Library type': "GEX"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd,"PJB")) self.assertEqual(determine_qc_protocol(project), "10x_Multiome_GEX")
def test_check_fastq_strand_outputs_standardSE_present(self): """ check_fastq_strand_outputs: fastq_strand.py output present (standardSE) """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( protocol="standardSE", include_fastq_strand=True, include_multiqc=False) fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf") # Check the outputs self.assertEqual( check_fastq_strand_outputs(project, "qc", fastq_strand_conf, qc_protocol="standardSE"), [])
def test_report_qc_incomplete_outputs(self): """report_qc: project with some QC outputs missing """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Remove an output os.remove( os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html")) # Do reporting self.assertEqual(report_qc(project), 1) # Check output and reports for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_non_default_log_dir(self): """QCPipeline: standard QC run using non-default log dir """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Non-default log dir log_dir = os.path.join(self.wd, "logs") self.assertFalse(os.path.exists(log_dir), "Log dir '%s' already exists" % log_dir) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True, log_dir=log_dir) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")), "'qc' directory doesn't exist, but should") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f) # Check log directory self.assertTrue(os.path.exists(log_dir), "Log dir '%s' not found" % log_dir)
def test_update_project_metadata_file_dont_uncomment_missing_project_when_dir_is_present( self): """ AutoProcess.update_project_metadata_file: don't uncomment 'missing' project when dir is present """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) # Create projects.info file with one project already listed with open(os.path.join(mockdir.dirn, "projects.info"), 'wt') as fp: fp.write( "#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments\n#FG\tFG5,FG6\t.\t.\t.\t.\t.\tKeep me" ) # Create the corresponding project project = MockAnalysisProject( 'FG', ('FG5_S1_R1_001.fastq.gz', 'FG6_S1_R1_001.fastq.gz')) project.create(top_dir=mockdir.dirn) # Update the projects.info file AutoProcess(mockdir.dirn).update_project_metadata_file() # Check output - missing project kept but commented out with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp: print(fp.read()) with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp: self.assertEqual( fp.read(), """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments AB\tAB1,AB2\t.\t.\t.\t.\t.\t. CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\t. #FG\tFG5,FG6\t.\t.\t.\t.\t.\tKeep me """)
class TestQCReporter(unittest.TestCase): def setUp(self): # Temporary working dir (if needed) self.wd = None def tearDown(self): # Remove temporary working dir if self.wd is not None and os.path.isdir(self.wd): shutil.rmtree(self.wd) def _make_working_dir(self): # Create a temporary working directory if self.wd is None: self.wd = tempfile.mkdtemp(suffix='.test_QCReporter') def _make_analysis_project(self, paired_end=True): # Create a mock Analysis Project directory self._make_working_dir() # Generate names for fastq files to add if paired_end: reads = (1, 2) else: reads = (1, ) sample_names = ('PJB1', 'PJB2') fastq_names = [] for i, sname in enumerate(sample_names, start=1): for read in reads: fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read) fastq_names.append(fq) self.analysis_dir = MockAnalysisProject('PJB', fastq_names) # Create the mock directory self.analysis_dir.create(top_dir=self.wd) # Populate with fake QC products qc_dir = os.path.join(self.wd, self.analysis_dir.name, 'qc') qc_logs = os.path.join(qc_dir, 'logs') os.mkdir(qc_dir) os.mkdir(qc_logs) for fq in fastq_names: # FastQC MockQCOutputs.fastqc_v0_11_2(fq, qc_dir) # Fastq_screen MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'model_organisms') MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'other_organisms') MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'rRNA') return os.path.join(self.wd, self.analysis_dir.name) def test_qcreporter_paired_end(self): analysis_dir = self._make_analysis_project(paired_end=True) project = AnalysisProject('PJB', analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, 'PJB') self.assertTrue(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, 'report.PE.html')) self.assertTrue(os.path.exists(os.path.join(self.wd, 'report.PE.html'))) def test_qcreporter_single_end(self): analysis_dir = self._make_analysis_project(paired_end=False) project = AnalysisProject('PJB', analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, 'PJB') self.assertFalse(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, 'report.SE.html')) self.assertTrue(os.path.exists(os.path.join(self.wd, 'report.SE.html')))
class TestQCReporter(unittest.TestCase): def setUp(self): # Temporary working dir (if needed) self.wd = None def tearDown(self): # Remove temporary working dir if self.wd is not None and os.path.isdir(self.wd): shutil.rmtree(self.wd) def _make_working_dir(self): # Create a temporary working directory if self.wd is None: self.wd = tempfile.mkdtemp(suffix=".test_QCReporter") def _make_analysis_project(self, paired_end=True): # Create a mock Analysis Project directory self._make_working_dir() # Generate names for fastq files to add if paired_end: reads = (1, 2) else: reads = (1,) sample_names = ("PJB1", "PJB2") fastq_names = [] for i, sname in enumerate(sample_names, start=1): for read in reads: fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read) fastq_names.append(fq) self.analysis_dir = MockAnalysisProject("PJB", fastq_names) # Create the mock directory self.analysis_dir.create(top_dir=self.wd) # Populate with fake QC products qc_dir = os.path.join(self.wd, self.analysis_dir.name, "qc") qc_logs = os.path.join(qc_dir, "logs") os.mkdir(qc_dir) os.mkdir(qc_logs) for fq in fastq_names: # FastQC MockQCOutputs.fastqc_v0_11_2(fq, qc_dir) # Fastq_screen MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "model_organisms") MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "other_organisms") MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "rRNA") return os.path.join(self.wd, self.analysis_dir.name) def test_qcreporter_paired_end(self): analysis_dir = self._make_analysis_project(paired_end=True) project = AnalysisProject("PJB", analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, "PJB") self.assertTrue(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, "report.PE.html")) self.assertTrue(os.path.exists(os.path.join(self.wd, "report.PE.html"))) def test_qcreporter_single_end(self): analysis_dir = self._make_analysis_project(paired_end=False) project = AnalysisProject("PJB", analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, "PJB") self.assertFalse(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, "report.SE.html")) self.assertTrue(os.path.exists(os.path.join(self.wd, "report.SE.html")))