Ejemplo n.º 1
0
 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Ejemplo n.º 2
0
 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Ejemplo n.º 3
0
 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)
Ejemplo n.º 4
0
 def test_publish_qc_with_projects_no_reports(self):
     """publish_qc: projects with all QC outputs but no reports
     """
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_K00879_0087_000000000-AGEW9',
         'hiseq',
         metadata={
             "run_number": 87,
             "source": "local",
             "instrument_datestamp": "160621"
         },
         top_dir=self.dirn)
     mockdir.create()
     ap = AutoProcess(mockdir.dirn, settings=self.settings)
     # Add processing report and QC outputs
     UpdateAnalysisDir(ap).add_processing_report()
     for project in ap.get_analysis_projects():
         UpdateAnalysisProject(project).add_qc_outputs()
     # Remove the QC reports
     for project in ap.get_analysis_projects():
         qc_reports = []
         qc_reports.append(
             "qc_report.%s.%s.zip" %
             (project.name, os.path.basename(ap.analysis_dir)))
         qc_reports.append("qc_report.html")
         qc_reports.append("multiqc_report.html")
         for f in qc_reports:
             os.remove(os.path.join(project.dirn, f))
     # Make a mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make a mock publication area
     publication_dir = os.path.join(self.dirn, 'QC')
     os.mkdir(publication_dir)
     # Publish
     publish_qc(ap, location=publication_dir)
     # Check outputs
     outputs = ["index.html", "processing_qc.html"]
     for project in ap.get_analysis_projects():
         # Standard QC outputs
         project_qc = "qc_report.%s.%s" % (
             project.name, os.path.basename(ap.analysis_dir))
         outputs.append(project_qc)
         outputs.append("%s.zip" % project_qc)
         outputs.append(os.path.join(project_qc, "qc_report.html"))
         outputs.append(os.path.join(project_qc, "qc"))
     for item in outputs:
         f = os.path.join(publication_dir,
                          "160621_K00879_0087_000000000-AGEW9_analysis",
                          item)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
 def test_import_project_with_qc(self):
     """import_project: check project with QC outputs is imported
     """
     # Make mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Add QC outputs to the project to be imported
     UpdateAnalysisProject(AnalysisProject(
         'NewProj',
         self.new_project_dir)).add_qc_outputs(include_multiqc=False)
     print(os.listdir(os.path.join(self.dirn, 'NewProj')))
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
     # Check for QC report and ZIP file
     print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj')))
     for f in (
             "qc_report.html",
             "multiqc_report.html",
             "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip",
     ):
         f = os.path.join(ap2.analysis_dir, 'NewProj', f)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
Ejemplo n.º 6
0
    def test_run_qc(self):
        """run_qc: standard QC run
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())
Ejemplo n.º 7
0
 def test_qcpipeline_non_default_log_dir(self):
     """QCPipeline: standard QC run using non-default log dir
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Non-default log dir
     log_dir = os.path.join(self.wd, "logs")
     self.assertFalse(os.path.exists(log_dir),
                      "Log dir '%s' already exists" % log_dir)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True,
                       log_dir=log_dir)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")),
                     "'qc' directory doesn't exist, but should")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
     # Check log directory
     self.assertTrue(os.path.exists(log_dir),
                     "Log dir '%s' not found" % log_dir)
Ejemplo n.º 8
0
    def test_run_qc_with_strandedness(self):
        """run_qc: standard QC run with strandedness determination
        """
        # Make mock illumina_qc.sh and multiqc
        MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
        MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
        MockMultiQC.create(os.path.join(self.bin, "multiqc"))
        os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
        # Make mock analysis directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '170901_M00879_0087_000000000-AGEW9',
            'miseq',
            metadata={"instrument_datestamp": "170901"},
            project_metadata={
                "AB": {
                    "Organism": "human",
                },
                "CDE": {
                    "Organism": "mouse",
                }
            },
            top_dir=self.dirn)
        mockdir.create()
        # Settings file with fastq_strand indexes and
        # polling interval
        settings_ini = os.path.join(self.dirn, "settings.ini")
        with open(settings_ini, 'w') as s:
            s.write("""[general]
poll_interval = 0.5

[fastq_strand_indexes]
human = /data/genomeIndexes/hg38/STAR
mouse = /data/genomeIndexes/mm10/STAR
""")
        # Make autoprocess instance
        ap = AutoProcess(analysis_dir=mockdir.dirn,
                         settings=Settings(settings_ini))
        # Run the QC
        status = run_qc(ap, run_multiqc=True, max_jobs=1)
        self.assertEqual(status, 0)
        # Check the fastq_strand_conf files were created
        for p in ("AB", "CDE"):
            self.assertTrue(
                os.path.exists(
                    os.path.join(mockdir.dirn, p, "qc", "fastq_strand.conf")))
        # Check fastq_strand outputs are present
        for p in ("AB", "CDE"):
            fastq_strand_outputs = filter(
                lambda f: f.endswith("fastq_strand.txt"),
                os.listdir(os.path.join(mockdir.dirn, p, "qc")))
            self.assertTrue(len(fastq_strand_outputs) > 0)
        # Check output and reports
        for p in ("AB", "CDE", "undetermined"):
            for f in ("qc", "qc_report.html", "qc_report.%s.%s_analysis.zip" %
                      (p, '170901_M00879_0087_000000000-AGEW9'),
                      "multiqc_report.html"):
                self.assertTrue(
                    os.path.exists(os.path.join(mockdir.dirn, p, f)),
                    "Missing %s in project '%s'" % (f, p))
            # Check zip file has MultiQC report
            zip_file = os.path.join(
                mockdir.dirn, p, "qc_report.%s.%s_analysis.zip" %
                (p, '170901_M00879_0087_000000000-AGEW9'))
            with zipfile.ZipFile(zip_file) as z:
                multiqc = os.path.join(
                    "qc_report.%s.%s_analysis" %
                    (p, '170901_M00879_0087_000000000-AGEW9'),
                    "multiqc_report.html")
                self.assertTrue(multiqc in z.namelist())