def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_run_qc(self): """run_qc: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '170901_M00879_0087_000000000-AGEW9', 'miseq', metadata={"instrument_datestamp": "170901"}, top_dir=self.dirn) mockdir.create() # Settings file with polling interval settings_ini = os.path.join(self.dirn, "settings.ini") with open(settings_ini, 'w') as s: s.write("""[general] poll_interval = 0.5 """) # Make autoprocess instance ap = AutoProcess(analysis_dir=mockdir.dirn, settings=Settings(settings_ini)) # Run the QC status = run_qc(ap, run_multiqc=True, max_jobs=1) self.assertEqual(status, 0) # Check output and reports for p in ("AB", "CDE", "undetermined"): for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html"): self.assertTrue( os.path.exists(os.path.join(mockdir.dirn, p, f)), "Missing %s in project '%s'" % (f, p)) # Check zip file has MultiQC report zip_file = os.path.join( mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9')) with zipfile.ZipFile(zip_file) as z: multiqc = os.path.join( "qc_report.%s.%s_analysis" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html") self.assertTrue(multiqc in z.namelist())
def test_qcpipeline_non_default_log_dir(self): """QCPipeline: standard QC run using non-default log dir """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Non-default log dir log_dir = os.path.join(self.wd, "logs") self.assertFalse(os.path.exists(log_dir), "Log dir '%s' already exists" % log_dir) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True, log_dir=log_dir) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")), "'qc' directory doesn't exist, but should") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f) # Check log directory self.assertTrue(os.path.exists(log_dir), "Log dir '%s' not found" % log_dir)
def test_run_qc_with_strandedness(self): """run_qc: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '170901_M00879_0087_000000000-AGEW9', 'miseq', metadata={"instrument_datestamp": "170901"}, project_metadata={ "AB": { "Organism": "human", }, "CDE": { "Organism": "mouse", } }, top_dir=self.dirn) mockdir.create() # Settings file with fastq_strand indexes and # polling interval settings_ini = os.path.join(self.dirn, "settings.ini") with open(settings_ini, 'w') as s: s.write("""[general] poll_interval = 0.5 [fastq_strand_indexes] human = /data/genomeIndexes/hg38/STAR mouse = /data/genomeIndexes/mm10/STAR """) # Make autoprocess instance ap = AutoProcess(analysis_dir=mockdir.dirn, settings=Settings(settings_ini)) # Run the QC status = run_qc(ap, run_multiqc=True, max_jobs=1) self.assertEqual(status, 0) # Check the fastq_strand_conf files were created for p in ("AB", "CDE"): self.assertTrue( os.path.exists( os.path.join(mockdir.dirn, p, "qc", "fastq_strand.conf"))) # Check fastq_strand outputs are present for p in ("AB", "CDE"): fastq_strand_outputs = filter( lambda f: f.endswith("fastq_strand.txt"), os.listdir(os.path.join(mockdir.dirn, p, "qc"))) self.assertTrue(len(fastq_strand_outputs) > 0) # Check output and reports for p in ("AB", "CDE", "undetermined"): for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html"): self.assertTrue( os.path.exists(os.path.join(mockdir.dirn, p, f)), "Missing %s in project '%s'" % (f, p)) # Check zip file has MultiQC report zip_file = os.path.join( mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9')) with zipfile.ZipFile(zip_file) as z: multiqc = os.path.join( "qc_report.%s.%s_analysis" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html") self.assertTrue(multiqc in z.namelist())