Beispiel #1
0
 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #2
0
 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #3
0
 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)
Beispiel #4
0
    def test_run_qc(self):
        """run_qc: standard QC run
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())
Beispiel #5
0
 def test_qcpipeline_non_default_log_dir(self):
     """QCPipeline: standard QC run using non-default log dir
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Non-default log dir
     log_dir = os.path.join(self.wd, "logs")
     self.assertFalse(os.path.exists(log_dir),
                      "Log dir '%s' already exists" % log_dir)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True,
                       log_dir=log_dir)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")),
                     "'qc' directory doesn't exist, but should")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
     # Check log directory
     self.assertTrue(os.path.exists(log_dir),
                     "Log dir '%s' not found" % log_dir)
Beispiel #6
0
    def test_run_qc_with_strandedness(self):
        """run_qc: standard QC run with strandedness determination
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            project_metadata={
                "AB": {
                    "Organism": "human",
                },
                "CDE": {
                    "Organism": "mouse",
                }
            },
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with fastq_strand indexes and
        # polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5

[fastq_strand_indexes]
human = /data/genomeIndexes/hg38/STAR
mouse = /data/genomeIndexes/mm10/STAR
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check the fastq_strand_conf files were created
        for p in ("AB", "CDE"):
            self.assertTrue(
                os.path.exists(
                    os.path.join(mockdir.dirn, p, "qc", "fastq_strand.conf")))
        # Check fastq_strand outputs are present
        for p in ("AB", "CDE"):
            fastq_strand_outputs = filter(
                lambda f: f.endswith("fastq_strand.txt"),
                os.listdir(os.path.join(mockdir.dirn, p, "qc")))
            self.assertTrue(len(fastq_strand_outputs) > 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())