Esempio n. 1
0
 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Esempio n. 2
0
 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Esempio n. 3
0
 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)
Esempio n. 4
0
 def test_qcpipeline_non_default_log_dir(self):
     """QCPipeline: standard QC run using non-default log dir
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Non-default log dir
     log_dir = os.path.join(self.wd, "logs")
     self.assertFalse(os.path.exists(log_dir),
                      "Log dir '%s' already exists" % log_dir)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True,
                       log_dir=log_dir)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")),
                     "'qc' directory doesn't exist, but should")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
     # Check log directory
     self.assertTrue(os.path.exists(log_dir),
                     "Log dir '%s' not found" % log_dir)
Esempio n. 5
0
    runqc.add_project(analysis_project,
                      qc_protocol="singlecell",
                      fastq_dir="fastqs.samples",
                      qc_dir="qc.samples",
                      multiqc=True)
    runqc.add_project(analysis_project,
                      qc_protocol="singlecell",
                      fastq_dir="fastqs.barcodes",
                      qc_dir="qc.barcodes",
                      multiqc=False)
    exit_status = runqc.run(
        max_jobs=max_jobs,
        batch_size=25,
        runners={
            'qc_runner': runners['qc'],
            'report_runner': default_runner,
            'verify_runner': default_runner
        },
        fastq_strand_indexes=__settings.fastq_strand_indexes,
        nthreads=nprocessors['qc'],
        default_runner=default_runner,
        verbose=args.verbose)
    if exit_status != 0:
        # Finished with error
        logger.critical("QC failed: exit status %s" % exit_status)
        sys.exit(exit_status)

    # Finish
    print "All pipelines completed ok"
    sys.exit(0)
Esempio n. 6
0
 status = runqc.run(nthreads=nthreads,
                    fastq_screens=fastq_screens,
                    fastq_subset=args.fastq_screen_subset,
                    star_indexes=star_indexes,
                    cellranger_chemistry=\
                    args.cellranger_chemistry,
                    cellranger_transcriptomes=cellranger_transcriptomes,
                    cellranger_premrna_references=\
                    cellranger_premrna_references,
                    cellranger_atac_references=cellranger_atac_references,
                    cellranger_arc_references=cellranger_multiome_references,
                    cellranger_jobmode=cellranger_jobmode,
                    cellranger_maxjobs=max_jobs,
                    cellranger_mempercore=cellranger_mempercore,
                    cellranger_jobinterval=cellranger_jobinterval,
                    cellranger_localcores=cellranger_localcores,
                    cellranger_localmem=cellranger_localmem,
                    cellranger_exe=args.cellranger_exe,
                    cellranger_reference_dataset=\
                    cellranger_reference_dataset,
                    cellranger_out_dir=out_dir,
                    max_jobs=max_jobs,
                    max_slots=max_cores,
                    batch_size=args.batch_size,
                    batch_limit=args.max_batches,
                    runners=runners,
                    default_runner=default_runner,
                    envmodules=envmodules,
                    enable_conda=enable_conda,
                    conda_env_dir=args.conda_env_dir,
                    working_dir=working_dir,
                    legacy_screens=use_legacy_screen_names,
                    verbose=args.verbose)
Esempio n. 7
0
    # Output file name
    if args.filename is None:
        out_file = None
    else:
        out_file = args.filename
        if not os.path.isabs(out_file):
            out_file = os.path.join(project.dirn,out_file)

    # Set up and run the QC pipeline
    announce("Running QC")
    runqc = QCPipeline()
    runqc.add_project(project,
                      qc_dir=args.qc_dir,
                      fastq_dir=args.fastq_dir,
                      organism=args.organism,
                      qc_protocol=args.qc_protocol)
    status = runqc.run(nthreads=args.nthreads,
                       fastq_subset=args.fastq_screen_subset,
                       fastq_strand_indexes=
                       __settings.fastq_strand_indexes,
                       max_jobs=args.max_jobs,
                       batch_size=args.batch_size,
                       runners={
                           'qc_runner': qc_runner,
                           'verify_runner': verify_runner,
                           'report_runner': report_runner,
                       })
    if status:
        logger.critical("QC failed (see warnings above)")
    sys.exit(status)