Beispiel #1
0
 def test_expected_outputs_standardSE(self):
     """
     expected_outputs: standard single-end, no strandedness
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Reference outputs
     reference_outputs = (
         "PJB1_S1_R1_001_fastqc",
         "PJB1_S1_R1_001_fastqc.html",
         "PJB1_S1_R1_001_fastqc.zip",
         "PJB1_S1_R1_001_model_organisms_screen.png",
         "PJB1_S1_R1_001_model_organisms_screen.txt",
         "PJB1_S1_R1_001_other_organisms_screen.png",
         "PJB1_S1_R1_001_other_organisms_screen.txt",
         "PJB1_S1_R1_001_rRNA_screen.png",
         "PJB1_S1_R1_001_rRNA_screen.txt",
     )
     expected = expected_outputs(AnalysisProject(
         p.name, os.path.join(self.wd, p.name)),
                                 "qc",
                                 qc_protocol="standardSE")
     for e in expected:
         self.assertEqual(os.path.dirname(e),
                          os.path.join(self.wd, p.name, "qc"))
         self.assertTrue(os.path.basename(e) in reference_outputs)
     for r in reference_outputs:
         self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
Beispiel #2
0
 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #3
0
 def test_check_illumina_qc_outputs_singlecell_some_missing(self):
     """
     check_illumina_qc_outputs: some illumina_qc.sh outputs missing (singlecell)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Add QC artefacts
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs(
         include_fastq_strand=False, include_multiqc=False)
     # Remove some outputs
     for f in (
             "PJB1_S1_R2_001_fastqc.html",
             "PJB1_S1_R2_001_model_organisms_screen.txt",
     ):
         os.remove(os.path.join(project.qc_dir, f))
     # Check
     self.assertEqual(
         check_illumina_qc_outputs(project,
                                   qc_dir="qc",
                                   qc_protocol="singlecell"),
         [os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz")])
Beispiel #4
0
 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #5
0
 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)
Beispiel #6
0
 def test_check_fastq_strand_outputs_singlecell_missing(self):
     """
     check_fastq_strand_outputs: fastq_strand.py output missing (singlecell)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Make fastq_strand.conf
     fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf")
     with open(fastq_strand_conf, 'w') as fp:
         fp.write("")
     # Check the outputs
     self.assertEqual(
         check_fastq_strand_outputs(project,
                                    "qc",
                                    fastq_strand_conf,
                                    qc_protocol="singlecell"),
         [
             (os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz"), ),
         ])
 def _make_analysis_project(self, paired_end=True):
     # Create a mock Analysis Project directory
     self._make_working_dir()
     # Generate names for fastq files to add
     if paired_end:
         reads = (1, 2)
     else:
         reads = (1, )
     sample_names = ('PJB1', 'PJB2')
     fastq_names = []
     for i, sname in enumerate(sample_names, start=1):
         for read in reads:
             fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read)
             fastq_names.append(fq)
     self.analysis_dir = MockAnalysisProject('PJB', fastq_names)
     # Create the mock directory
     self.analysis_dir.create(top_dir=self.wd)
     # Populate with fake QC products
     qc_dir = os.path.join(self.wd, self.analysis_dir.name, 'qc')
     qc_logs = os.path.join(qc_dir, 'logs')
     os.mkdir(qc_dir)
     os.mkdir(qc_logs)
     for fq in fastq_names:
         # FastQC
         MockQCOutputs.fastqc_v0_11_2(fq, qc_dir)
         # Fastq_screen
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'model_organisms')
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'other_organisms')
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'rRNA')
     return os.path.join(self.wd, self.analysis_dir.name)
 def test_determine_qc_protocol_standardPE(self):
     """determine_qc_protocol: standard paired-end run
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "standardPE")
 def test_verify_qc_no_outputs(self):
     """verify_qc: project with no QC outputs
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Do verification
     self.assertFalse(verify_qc(project))
Beispiel #10
0
 def test_determine_qc_protocol_10xchromium3v3(self):
     """determine_qc_protocol: single-cell run (10xGenomics Chromium 3'v3)
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Single cell platform': "10xGenomics Chromium 3'v3"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "singlecell")
Beispiel #11
0
 def test_verify_qc_all_outputs(self):
     """verify_qc: project with all QC outputs present
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Do verification
     self.assertTrue(verify_qc(project))
Beispiel #12
0
 def setUp(self):
     # Create a temp working dir
     self.dirn = tempfile.mkdtemp(suffix='TestCellrangerMulti')
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_GEX_S1_R1_001.fastq.gz",
                                    "PJB1_GEX_S1_R2_001.fastq.gz",
                                    "PJB2_MC_S2_R1_001.fastq.gz",
                                    "PJB2_MC_S2_R2_001.fastq.gz",),
                             metadata={ 'Organism': 'Human',
                                        'Single cell platform':
                                        "10xGenomics Chromium 3'v3" })
     p.create(top_dir=self.dirn)
     self.project = AnalysisProject("PJB",os.path.join(self.dirn,"PJB"))
Beispiel #13
0
 def test_determine_qc_protocol_10xchromium3v2_atac_seq(self):
     """determine_qc_protocol: single-cell ATAC-seq (10xGenomics Single Cell ATAC)
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={
             'Single cell platform': "10xGenomics Single Cell ATAC",
             'Library type': "scATAC-seq"
         })
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "10x_scATAC")
Beispiel #14
0
 def _make_mock_analysis_project(self, single_cell_platform, library_type):
     # Create a mock AnalysisProject
     m = MockAnalysisProject('PJB',
                             fastq_names=(
                                 "PJB1_S1_L001_R1_001.fastq.gz",
                                 "PJB1_S1_L001_R2_001.fastq.gz",
                             ),
                             metadata={
                                 'Single cell platform':
                                 single_cell_platform,
                                 'Library type': library_type,
                             })
     m.create(top_dir=self.wd)
     return os.path.join(self.wd, 'PJB')
Beispiel #15
0
 def test_expected_outputs_standardPE_with_strand(self):
     """
     expected_outputs: standard paired-end with strandedness
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Make mock fastq_strand
     mock_fastq_strand_conf = os.path.join(self.wd, p.name,
                                           "fastq_strand.conf")
     with open(mock_fastq_strand_conf, 'w') as fp:
         fp.write("")
     # Reference outputs
     reference_outputs = (
         "PJB1_S1_R1_001_fastqc",
         "PJB1_S1_R1_001_fastqc.html",
         "PJB1_S1_R1_001_fastqc.zip",
         "PJB1_S1_R1_001_model_organisms_screen.png",
         "PJB1_S1_R1_001_model_organisms_screen.txt",
         "PJB1_S1_R1_001_other_organisms_screen.png",
         "PJB1_S1_R1_001_other_organisms_screen.txt",
         "PJB1_S1_R1_001_rRNA_screen.png",
         "PJB1_S1_R1_001_rRNA_screen.txt",
         "PJB1_S1_R2_001_fastqc",
         "PJB1_S1_R2_001_fastqc.html",
         "PJB1_S1_R2_001_fastqc.zip",
         "PJB1_S1_R2_001_model_organisms_screen.png",
         "PJB1_S1_R2_001_model_organisms_screen.txt",
         "PJB1_S1_R2_001_other_organisms_screen.png",
         "PJB1_S1_R2_001_other_organisms_screen.txt",
         "PJB1_S1_R2_001_rRNA_screen.png",
         "PJB1_S1_R2_001_rRNA_screen.txt",
         "PJB1_S1_R1_001_fastq_strand.txt",
     )
     expected = expected_outputs(AnalysisProject(
         p.name, os.path.join(self.wd, p.name)),
                                 "qc",
                                 fastq_strand_conf=mock_fastq_strand_conf,
                                 qc_protocol="standardPE")
     for e in expected:
         self.assertEqual(os.path.dirname(e),
                          os.path.join(self.wd, p.name, "qc"))
         self.assertTrue(os.path.basename(e) in reference_outputs)
     for r in reference_outputs:
         self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
 def _make_analysis_project(self, paired_end=True):
     # Create a mock Analysis Project directory
     self._make_working_dir()
     # Generate names for fastq files to add
     if paired_end:
         reads = (1, 2)
     else:
         reads = (1,)
     sample_names = ("PJB1", "PJB2")
     fastq_names = []
     for i, sname in enumerate(sample_names, start=1):
         for read in reads:
             fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read)
             fastq_names.append(fq)
     self.analysis_dir = MockAnalysisProject("PJB", fastq_names)
     # Create the mock directory
     self.analysis_dir.create(top_dir=self.wd)
     # Populate with fake QC products
     qc_dir = os.path.join(self.wd, self.analysis_dir.name, "qc")
     qc_logs = os.path.join(qc_dir, "logs")
     os.mkdir(qc_dir)
     os.mkdir(qc_logs)
     for fq in fastq_names:
         # FastQC
         MockQCOutputs.fastqc_v0_11_2(fq, qc_dir)
         # Fastq_screen
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "model_organisms")
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "other_organisms")
         MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "rRNA")
     return os.path.join(self.wd, self.analysis_dir.name)
Beispiel #17
0
 def test_check_illumina_qc_outputs_standardSE_all_missing(self):
     """
     check_illumina_qc_outputs: all illumina_qc.sh outputs missing (standardSE)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Get the outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Check
     self.assertEqual(
         check_illumina_qc_outputs(project,
                                   qc_dir="qc",
                                   qc_protocol="standardSE"),
         project.fastqs)
Beispiel #18
0
 def test_verify_qc_incomplete_outputs(self):
     """verify_qc: project with some QC outputs missing
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Remove an output
     os.remove(
         os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html"))
     # Do verification
     self.assertFalse(verify_qc(project))
Beispiel #19
0
 def test_report_qc_no_outputs(self):
     """report_qc: project with no QC outputs
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Do reporting
     self.assertEqual(report_qc(project), 1)
     # Check output and reports
     for f in ("qc_report.html", "qc_report.PJB.zip",
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s (should be missing)" % f)
 def test_determine_qc_protocol_10x_visium(self):
     """determine_qc_protocol: spatial RNA-seq run (10xGenomics Visium)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz",
                                    "PJB1_S1_R2_001.fastq.gz",
                                    "PJB2_S2_R1_001.fastq.gz",
                                    "PJB2_S2_R2_001.fastq.gz"),
                             metadata={'Single cell platform':
                                       "10xGenomics Visium",
                                       'Library type':
                                       "scATAC-seq"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB",
                               os.path.join(self.wd,"PJB"))
     self.assertEqual(determine_qc_protocol(project),
                      "10x_Visium")
 def test_determine_qc_protocol_10xchromium3v3_cellplex(self):
     """determine_qc_protocol: cell multiplexing CellPlex (10xGenomics Chromium 3'v3)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz",
                                    "PJB1_S1_R2_001.fastq.gz",
                                    "PJB2_S2_R1_001.fastq.gz",
                                    "PJB2_S2_R2_001.fastq.gz"),
                             metadata={'Single cell platform':
                                       "10xGenomics Chromium 3'v3",
                                       'Library type':
                                       "CellPlex"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB",
                               os.path.join(self.wd,"PJB"))
     self.assertEqual(determine_qc_protocol(project),
                      "10x_CellPlex")
Beispiel #22
0
 def test_check_illumina_qc_outputs_standardSE_all_present(self):
     """
     check_illumina_qc_outputs: all illumina_qc.sh outputs present (standardSE)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Add QC artefacts
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs(
         include_fastq_strand=False, include_multiqc=False)
     # Check
     self.assertEqual(
         check_illumina_qc_outputs(project,
                                   qc_dir="qc",
                                   qc_protocol="standardSE"), [])
Beispiel #23
0
 def test_report_qc_all_outputs(self):
     """report_qc: project with all QC outputs present
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Do reporting
     self.assertEqual(report_qc(project), 0)
     # Check output and reports
     for f in ("qc_report.html", "qc_report.PJB.zip",
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
 def test_determine_qc_protocol_parse_evercode(self):
     """determine_qc_protocol: Parse Evercode single cell RNA-seq run
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz",
                                    "PJB1_S1_R2_001.fastq.gz",
                                    "PJB2_S2_R1_001.fastq.gz",
                                    "PJB2_S2_R2_001.fastq.gz",
                                    "PJB1_S1_I1_001.fastq.gz",
                                    "PJB1_S1_I2_001.fastq.gz"),
                             metadata={'Single cell platform':
                                       "Parse Evercode",
                                       'Library type':
                                       "scRNA-seq"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB",
                               os.path.join(self.wd,"PJB"))
     self.assertEqual(determine_qc_protocol(project),
                      "ParseEvercode")
 def test_determine_qc_protocol_10x_multiome_gex(self):
     """determine_qc_protocol: single cell multiome GEX run (10xGenomics Multiome GEX)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_S1_R1_001.fastq.gz",
                                    "PJB1_S1_R2_001.fastq.gz",
                                    "PJB2_S2_R1_001.fastq.gz",
                                    "PJB2_S2_R2_001.fastq.gz",
                                    "PJB1_S1_I1_001.fastq.gz",
                                    "PJB1_S1_I2_001.fastq.gz"),
                             metadata={'Single cell platform':
                                       "10xGenomics Single Cell Multiome",
                                       'Library type':
                                       "GEX"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB",
                               os.path.join(self.wd,"PJB"))
     self.assertEqual(determine_qc_protocol(project),
                      "10x_Multiome_GEX")
Beispiel #26
0
 def test_check_fastq_strand_outputs_standardSE_present(self):
     """
     check_fastq_strand_outputs: fastq_strand.py output present (standardSE)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs(
         protocol="standardSE",
         include_fastq_strand=True,
         include_multiqc=False)
     fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf")
     # Check the outputs
     self.assertEqual(
         check_fastq_strand_outputs(project,
                                    "qc",
                                    fastq_strand_conf,
                                    qc_protocol="standardSE"), [])
Beispiel #27
0
 def test_report_qc_incomplete_outputs(self):
     """report_qc: project with some QC outputs missing
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Remove an output
     os.remove(
         os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html"))
     # Do reporting
     self.assertEqual(report_qc(project), 1)
     # Check output and reports
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #28
0
 def test_qcpipeline_non_default_log_dir(self):
     """QCPipeline: standard QC run using non-default log dir
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Non-default log dir
     log_dir = os.path.join(self.wd, "logs")
     self.assertFalse(os.path.exists(log_dir),
                      "Log dir '%s' already exists" % log_dir)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True,
                       log_dir=log_dir)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     self.assertTrue(os.path.isdir(os.path.join(self.wd, "PJB", "qc")),
                     "'qc' directory doesn't exist, but should")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
     # Check log directory
     self.assertTrue(os.path.exists(log_dir),
                     "Log dir '%s' not found" % log_dir)
Beispiel #29
0
    def test_update_project_metadata_file_dont_uncomment_missing_project_when_dir_is_present(
            self):
        """
        AutoProcess.update_project_metadata_file: don't uncomment 'missing' project when dir is present
        """
        # Make an auto-process directory
        mockdir = MockAnalysisDirFactory.bcl2fastq2(
            '160621_K00879_0087_000000000-AGEW9',
            'hiseq',
            metadata={
                "run_number": 87,
                "source": "local"
            },
            top_dir=self.dirn)
        mockdir.create(no_project_dirs=True)
        # Create projects.info file with one project already listed
        with open(os.path.join(mockdir.dirn, "projects.info"), 'wt') as fp:
            fp.write(
                "#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments\n#FG\tFG5,FG6\t.\t.\t.\t.\t.\tKeep me"
            )
        # Create the corresponding project
        project = MockAnalysisProject(
            'FG', ('FG5_S1_R1_001.fastq.gz', 'FG6_S1_R1_001.fastq.gz'))
        project.create(top_dir=mockdir.dirn)
        # Update the projects.info file
        AutoProcess(mockdir.dirn).update_project_metadata_file()
        # Check output - missing project kept but commented out
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            print(fp.read())
        with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp:
            self.assertEqual(
                fp.read(),
                """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments
AB\tAB1,AB2\t.\t.\t.\t.\t.\t.
CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\t.
#FG\tFG5,FG6\t.\t.\t.\t.\t.\tKeep me
""")
class TestQCReporter(unittest.TestCase):
    def setUp(self):
        # Temporary working dir (if needed)
        self.wd = None

    def tearDown(self):
        # Remove temporary working dir
        if self.wd is not None and os.path.isdir(self.wd):
            shutil.rmtree(self.wd)

    def _make_working_dir(self):
        # Create a temporary working directory
        if self.wd is None:
            self.wd = tempfile.mkdtemp(suffix='.test_QCReporter')

    def _make_analysis_project(self, paired_end=True):
        # Create a mock Analysis Project directory
        self._make_working_dir()
        # Generate names for fastq files to add
        if paired_end:
            reads = (1, 2)
        else:
            reads = (1, )
        sample_names = ('PJB1', 'PJB2')
        fastq_names = []
        for i, sname in enumerate(sample_names, start=1):
            for read in reads:
                fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read)
                fastq_names.append(fq)
        self.analysis_dir = MockAnalysisProject('PJB', fastq_names)
        # Create the mock directory
        self.analysis_dir.create(top_dir=self.wd)
        # Populate with fake QC products
        qc_dir = os.path.join(self.wd, self.analysis_dir.name, 'qc')
        qc_logs = os.path.join(qc_dir, 'logs')
        os.mkdir(qc_dir)
        os.mkdir(qc_logs)
        for fq in fastq_names:
            # FastQC
            MockQCOutputs.fastqc_v0_11_2(fq, qc_dir)
            # Fastq_screen
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'model_organisms')
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'other_organisms')
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, 'rRNA')
        return os.path.join(self.wd, self.analysis_dir.name)

    def test_qcreporter_paired_end(self):
        analysis_dir = self._make_analysis_project(paired_end=True)
        project = AnalysisProject('PJB', analysis_dir)
        reporter = QCReporter(project)
        self.assertEqual(reporter.name, 'PJB')
        self.assertTrue(reporter.paired_end)
        self.assertTrue(reporter.verify())
        reporter.report(filename=os.path.join(self.wd, 'report.PE.html'))
        self.assertTrue(os.path.exists(os.path.join(self.wd,
                                                    'report.PE.html')))

    def test_qcreporter_single_end(self):
        analysis_dir = self._make_analysis_project(paired_end=False)
        project = AnalysisProject('PJB', analysis_dir)
        reporter = QCReporter(project)
        self.assertEqual(reporter.name, 'PJB')
        self.assertFalse(reporter.paired_end)
        self.assertTrue(reporter.verify())
        reporter.report(filename=os.path.join(self.wd, 'report.SE.html'))
        self.assertTrue(os.path.exists(os.path.join(self.wd,
                                                    'report.SE.html')))
class TestQCReporter(unittest.TestCase):
    def setUp(self):
        # Temporary working dir (if needed)
        self.wd = None

    def tearDown(self):
        # Remove temporary working dir
        if self.wd is not None and os.path.isdir(self.wd):
            shutil.rmtree(self.wd)

    def _make_working_dir(self):
        # Create a temporary working directory
        if self.wd is None:
            self.wd = tempfile.mkdtemp(suffix=".test_QCReporter")

    def _make_analysis_project(self, paired_end=True):
        # Create a mock Analysis Project directory
        self._make_working_dir()
        # Generate names for fastq files to add
        if paired_end:
            reads = (1, 2)
        else:
            reads = (1,)
        sample_names = ("PJB1", "PJB2")
        fastq_names = []
        for i, sname in enumerate(sample_names, start=1):
            for read in reads:
                fq = "%s_S%d_R%d_001.fastq.gz" % (sname, i, read)
                fastq_names.append(fq)
        self.analysis_dir = MockAnalysisProject("PJB", fastq_names)
        # Create the mock directory
        self.analysis_dir.create(top_dir=self.wd)
        # Populate with fake QC products
        qc_dir = os.path.join(self.wd, self.analysis_dir.name, "qc")
        qc_logs = os.path.join(qc_dir, "logs")
        os.mkdir(qc_dir)
        os.mkdir(qc_logs)
        for fq in fastq_names:
            # FastQC
            MockQCOutputs.fastqc_v0_11_2(fq, qc_dir)
            # Fastq_screen
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "model_organisms")
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "other_organisms")
            MockQCOutputs.fastq_screen_v0_9_2(fq, qc_dir, "rRNA")
        return os.path.join(self.wd, self.analysis_dir.name)

    def test_qcreporter_paired_end(self):
        analysis_dir = self._make_analysis_project(paired_end=True)
        project = AnalysisProject("PJB", analysis_dir)
        reporter = QCReporter(project)
        self.assertEqual(reporter.name, "PJB")
        self.assertTrue(reporter.paired_end)
        self.assertTrue(reporter.verify())
        reporter.report(filename=os.path.join(self.wd, "report.PE.html"))
        self.assertTrue(os.path.exists(os.path.join(self.wd, "report.PE.html")))

    def test_qcreporter_single_end(self):
        analysis_dir = self._make_analysis_project(paired_end=False)
        project = AnalysisProject("PJB", analysis_dir)
        reporter = QCReporter(project)
        self.assertEqual(reporter.name, "PJB")
        self.assertFalse(reporter.paired_end)
        self.assertTrue(reporter.verify())
        reporter.report(filename=os.path.join(self.wd, "report.SE.html"))
        self.assertTrue(os.path.exists(os.path.join(self.wd, "report.SE.html")))