def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_qcpipeline_non_default_log_dir(self): """QCPipeline: standard QC run using non-default log dir """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Non-default log dir log_dir = os.path.join(self.wd, "logs") self.assertFalse(os.path.exists(log_dir), "Log dir '%s' already exists" % log_dir) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True, log_dir=log_dir) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")), "'qc' directory doesn't exist, but should") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f) # Check log directory self.assertTrue(os.path.exists(log_dir), "Log dir '%s' not found" % log_dir)
runqc.add_project(analysis_project, qc_protocol="singlecell", fastq_dir="fastqs.samples", qc_dir="qc.samples", multiqc=True) runqc.add_project(analysis_project, qc_protocol="singlecell", fastq_dir="fastqs.barcodes", qc_dir="qc.barcodes", multiqc=False) exit_status = runqc.run( max_jobs=max_jobs, batch_size=25, runners={ 'qc_runner': runners['qc'], 'report_runner': default_runner, 'verify_runner': default_runner }, fastq_strand_indexes=__settings.fastq_strand_indexes, nthreads=nprocessors['qc'], default_runner=default_runner, verbose=args.verbose) if exit_status != 0: # Finished with error logger.critical("QC failed: exit status %s" % exit_status) sys.exit(exit_status) # Finish print "All pipelines completed ok" sys.exit(0)
status = runqc.run(nthreads=nthreads, fastq_screens=fastq_screens, fastq_subset=args.fastq_screen_subset, star_indexes=star_indexes, cellranger_chemistry=\ args.cellranger_chemistry, cellranger_transcriptomes=cellranger_transcriptomes, cellranger_premrna_references=\ cellranger_premrna_references, cellranger_atac_references=cellranger_atac_references, cellranger_arc_references=cellranger_multiome_references, cellranger_jobmode=cellranger_jobmode, cellranger_maxjobs=max_jobs, cellranger_mempercore=cellranger_mempercore, cellranger_jobinterval=cellranger_jobinterval, cellranger_localcores=cellranger_localcores, cellranger_localmem=cellranger_localmem, cellranger_exe=args.cellranger_exe, cellranger_reference_dataset=\ cellranger_reference_dataset, cellranger_out_dir=out_dir, max_jobs=max_jobs, max_slots=max_cores, batch_size=args.batch_size, batch_limit=args.max_batches, runners=runners, default_runner=default_runner, envmodules=envmodules, enable_conda=enable_conda, conda_env_dir=args.conda_env_dir, working_dir=working_dir, legacy_screens=use_legacy_screen_names, verbose=args.verbose)
# Output file name if args.filename is None: out_file = None else: out_file = args.filename if not os.path.isabs(out_file): out_file = os.path.join(project.dirn,out_file) # Set up and run the QC pipeline announce("Running QC") runqc = QCPipeline() runqc.add_project(project, qc_dir=args.qc_dir, fastq_dir=args.fastq_dir, organism=args.organism, qc_protocol=args.qc_protocol) status = runqc.run(nthreads=args.nthreads, fastq_subset=args.fastq_screen_subset, fastq_strand_indexes= __settings.fastq_strand_indexes, max_jobs=args.max_jobs, batch_size=args.batch_size, runners={ 'qc_runner': qc_runner, 'verify_runner': verify_runner, 'report_runner': report_runner, }) if status: logger.critical("QC failed (see warnings above)") sys.exit(status)