Python QCPipeline Exemples, auto_process_ngs.qc.pipeline.QCPipeline Python Exemples

Exemple #1

0

Afficher le fichier

 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)

Exemple #2

0

Afficher le fichier

 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)

Exemple #3

0

Afficher le fichier

 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)

Exemple #4

0

Afficher le fichier

 def test_qcpipeline_non_default_log_dir(self):
     """QCPipeline: standard QC run using non-default log dir
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Non-default log dir
     log_dir = os.path.join(self.wd, "logs")
     self.assertFalse(os.path.exists(log_dir),
                      "Log dir '%s' already exists" % log_dir)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True,
                       log_dir=log_dir)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")),
                     "'qc' directory doesn't exist, but should")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
     # Check log directory
     self.assertTrue(os.path.exists(log_dir),
                     "Log dir '%s' not found" % log_dir)

Exemple #5

0

Afficher le fichier

    # Execute the pipelines
    print "Running the final pipeline"
    exit_status = ppl.run(log_dir=log_dir,
                          scripts_dir=scripts_dir,
                          default_runner=default_runner,
                          runners=runners,
                          max_jobs=max_jobs,
                          verbose=args.verbose)
    if exit_status != 0:
        # Finished with error
        logger.critical("Pipeline failed: exit status %s" % exit_status)
        sys.exit(exit_status)

    # Run the QC
    print "Running the QC"
    runqc = QCPipeline()
    runqc.add_project(analysis_project,
                      qc_protocol="singlecell",
                      fastq_dir="fastqs.samples",
                      qc_dir="qc.samples",
                      multiqc=True)
    runqc.add_project(analysis_project,
                      qc_protocol="singlecell",
                      fastq_dir="fastqs.barcodes",
                      qc_dir="qc.barcodes",
                      multiqc=False)
    exit_status = runqc.run(
        max_jobs=max_jobs,
        batch_size=25,
        runners={
            'qc_runner': runners['qc'],

Exemple #6

0

Afficher le fichier

        print("Registering temporary project directory for "
              "deletion on pipeline completion")
        atexit.register(cleanup_atexit,project_dir)

    # Load the project
    project = AnalysisProject(project_dir)
    print("Loaded project '%s'" % project.name)

    # Set working directory for pipeline
    working_dir = args.working_dir
    if not working_dir:
        working_dir = os.path.join(project_dir,'__run_qc')

    # Set up and run the QC pipeline
    announce("Running QC pipeline")
    runqc = QCPipeline()
    runqc.add_project(project,
                      qc_dir=qc_dir,
                      qc_protocol=args.qc_protocol,
                      report_html=out_file,
                      multiqc=(not args.no_multiqc))
    status = runqc.run(nthreads=nthreads,
                       fastq_screens=fastq_screens,
                       fastq_subset=args.fastq_screen_subset,
                       star_indexes=star_indexes,
                       cellranger_chemistry=\
                       args.cellranger_chemistry,
                       cellranger_transcriptomes=cellranger_transcriptomes,
                       cellranger_premrna_references=\
                       cellranger_premrna_references,
                       cellranger_atac_references=cellranger_atac_references,

Exemple #7

0

Afficher le fichier

    announce("Loading project data")
    project_dir = os.path.abspath(args.project_dir)
    project_name = os.path.basename(project_dir)
    project = AnalysisProject(project_name,project_dir)

    # Output file name
    if args.filename is None:
        out_file = None
    else:
        out_file = args.filename
        if not os.path.isabs(out_file):
            out_file = os.path.join(project.dirn,out_file)

    # Set up and run the QC pipeline
    announce("Running QC")
    runqc = QCPipeline()
    runqc.add_project(project,
                      qc_dir=args.qc_dir,
                      fastq_dir=args.fastq_dir,
                      organism=args.organism,
                      qc_protocol=args.qc_protocol)
    status = runqc.run(nthreads=args.nthreads,
                       fastq_subset=args.fastq_screen_subset,
                       fastq_strand_indexes=
                       __settings.fastq_strand_indexes,
                       max_jobs=args.max_jobs,
                       batch_size=args.batch_size,
                       runners={
                           'qc_runner': qc_runner,
                           'verify_runner': verify_runner,
                           'report_runner': report_runner,