def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_publish_qc_with_projects_no_reports(self): """publish_qc: projects with all QC outputs but no reports """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Remove the QC reports for project in ap.get_analysis_projects(): qc_reports = [] qc_reports.append( "qc_report.%s.%s.zip" % (project.name, os.path.basename(ap.analysis_dir))) qc_reports.append("qc_report.html") qc_reports.append("multiqc_report.html") for f in qc_reports: os.remove(os.path.join(project.dirn, f)) # Make a mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % ( project.name, os.path.basename(ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc, "qc_report.html")) outputs.append(os.path.join(project_qc, "qc")) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_import_project_with_qc(self): """import_project: check project with QC outputs is imported """ # Make mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Add QC outputs to the project to be imported UpdateAnalysisProject(AnalysisProject( 'NewProj', self.new_project_dir)).add_qc_outputs(include_multiqc=False) print(os.listdir(os.path.join(self.dirn, 'NewProj'))) # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Import the project import_project(ap, self.new_project_dir) self.assertTrue( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj'))) # Check for QC report and ZIP file print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj'))) for f in ( "qc_report.html", "multiqc_report.html", "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip", ): f = os.path.join(ap2.analysis_dir, 'NewProj', f) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_run_qc(self): """run_qc: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '170901_M00879_0087_000000000-AGEW9', 'miseq', metadata={"instrument_datestamp": "170901"}, top_dir=self.dirn) mockdir.create() # Settings file with polling interval settings_ini = os.path.join(self.dirn, "settings.ini") with open(settings_ini, 'w') as s: s.write("""[general] poll_interval = 0.5 """) # Make autoprocess instance ap = AutoProcess(analysis_dir=mockdir.dirn, settings=Settings(settings_ini)) # Run the QC status = run_qc(ap, run_multiqc=True, max_jobs=1) self.assertEqual(status, 0) # Check output and reports for p in ("AB", "CDE", "undetermined"): for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html"): self.assertTrue( os.path.exists(os.path.join(mockdir.dirn, p, f)), "Missing %s in project '%s'" % (f, p)) # Check zip file has MultiQC report zip_file = os.path.join( mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9')) with zipfile.ZipFile(zip_file) as z: multiqc = os.path.join( "qc_report.%s.%s_analysis" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html") self.assertTrue(multiqc in z.namelist())
def test_qcpipeline_non_default_log_dir(self): """QCPipeline: standard QC run using non-default log dir """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Non-default log dir log_dir = os.path.join(self.wd, "logs") self.assertFalse(os.path.exists(log_dir), "Log dir '%s' already exists" % log_dir) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True, log_dir=log_dir) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")), "'qc' directory doesn't exist, but should") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f) # Check log directory self.assertTrue(os.path.exists(log_dir), "Log dir '%s' not found" % log_dir)
def test_run_qc_with_strandedness(self): """run_qc: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '170901_M00879_0087_000000000-AGEW9', 'miseq', metadata={"instrument_datestamp": "170901"}, project_metadata={ "AB": { "Organism": "human", }, "CDE": { "Organism": "mouse", } }, top_dir=self.dirn) mockdir.create() # Settings file with fastq_strand indexes and # polling interval settings_ini = os.path.join(self.dirn, "settings.ini") with open(settings_ini, 'w') as s: s.write("""[general] poll_interval = 0.5 [fastq_strand_indexes] human = /data/genomeIndexes/hg38/STAR mouse = /data/genomeIndexes/mm10/STAR """) # Make autoprocess instance ap = AutoProcess(analysis_dir=mockdir.dirn, settings=Settings(settings_ini)) # Run the QC status = run_qc(ap, run_multiqc=True, max_jobs=1) self.assertEqual(status, 0) # Check the fastq_strand_conf files were created for p in ("AB", "CDE"): self.assertTrue( os.path.exists( os.path.join(mockdir.dirn, p, "qc", "fastq_strand.conf"))) # Check fastq_strand outputs are present for p in ("AB", "CDE"): fastq_strand_outputs = filter( lambda f: f.endswith("fastq_strand.txt"), os.listdir(os.path.join(mockdir.dirn, p, "qc"))) self.assertTrue(len(fastq_strand_outputs) > 0) # Check output and reports for p in ("AB", "CDE", "undetermined"): for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html"): self.assertTrue( os.path.exists(os.path.join(mockdir.dirn, p, f)), "Missing %s in project '%s'" % (f, p)) # Check zip file has MultiQC report zip_file = os.path.join( mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" % (p, '170901_M00879_0087_000000000-AGEW9')) with zipfile.ZipFile(zip_file) as z: multiqc = os.path.join( "qc_report.%s.%s_analysis" % (p, '170901_M00879_0087_000000000-AGEW9'), "multiqc_report.html") self.assertTrue(multiqc in z.namelist())