def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_qcpipeline_non_default_log_dir(self): """QCPipeline: standard QC run using non-default log dir """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Non-default log dir log_dir = os.path.join(self.wd, "logs") self.assertFalse(os.path.exists(log_dir), "Log dir '%s' already exists" % log_dir) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True, log_dir=log_dir) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")), "'qc' directory doesn't exist, but should") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f) # Check log directory self.assertTrue(os.path.exists(log_dir), "Log dir '%s' not found" % log_dir)
# Execute the pipelines print "Running the final pipeline" exit_status = ppl.run(log_dir=log_dir, scripts_dir=scripts_dir, default_runner=default_runner, runners=runners, max_jobs=max_jobs, verbose=args.verbose) if exit_status != 0: # Finished with error logger.critical("Pipeline failed: exit status %s" % exit_status) sys.exit(exit_status) # Run the QC print "Running the QC" runqc = QCPipeline() runqc.add_project(analysis_project, qc_protocol="singlecell", fastq_dir="fastqs.samples", qc_dir="qc.samples", multiqc=True) runqc.add_project(analysis_project, qc_protocol="singlecell", fastq_dir="fastqs.barcodes", qc_dir="qc.barcodes", multiqc=False) exit_status = runqc.run( max_jobs=max_jobs, batch_size=25, runners={ 'qc_runner': runners['qc'],
print("Registering temporary project directory for " "deletion on pipeline completion") atexit.register(cleanup_atexit,project_dir) # Load the project project = AnalysisProject(project_dir) print("Loaded project '%s'" % project.name) # Set working directory for pipeline working_dir = args.working_dir if not working_dir: working_dir = os.path.join(project_dir,'__run_qc') # Set up and run the QC pipeline announce("Running QC pipeline") runqc = QCPipeline() runqc.add_project(project, qc_dir=qc_dir, qc_protocol=args.qc_protocol, report_html=out_file, multiqc=(not args.no_multiqc)) status = runqc.run(nthreads=nthreads, fastq_screens=fastq_screens, fastq_subset=args.fastq_screen_subset, star_indexes=star_indexes, cellranger_chemistry=\ args.cellranger_chemistry, cellranger_transcriptomes=cellranger_transcriptomes, cellranger_premrna_references=\ cellranger_premrna_references, cellranger_atac_references=cellranger_atac_references,
announce("Loading project data") project_dir = os.path.abspath(args.project_dir) project_name = os.path.basename(project_dir) project = AnalysisProject(project_name,project_dir) # Output file name if args.filename is None: out_file = None else: out_file = args.filename if not os.path.isabs(out_file): out_file = os.path.join(project.dirn,out_file) # Set up and run the QC pipeline announce("Running QC") runqc = QCPipeline() runqc.add_project(project, qc_dir=args.qc_dir, fastq_dir=args.fastq_dir, organism=args.organism, qc_protocol=args.qc_protocol) status = runqc.run(nthreads=args.nthreads, fastq_subset=args.fastq_screen_subset, fastq_strand_indexes= __settings.fastq_strand_indexes, max_jobs=args.max_jobs, batch_size=args.batch_size, runners={ 'qc_runner': qc_runner, 'verify_runner': verify_runner, 'report_runner': report_runner,