Beispiel #1
0
    def test_set_cell_count_fails_for_project_with_no_metadata(self):
        """
        set_cell_count_for_project: raises exception for project with no metadata
        """
        # Set up mock project
        project_dir = self._make_mock_analysis_project(None, None)
        # Add metrics_summary.csv
        counts_dir = os.path.join(project_dir, "qc", "cellranger_count",
                                  "5.0.1", "refdata-gex-GRCh38-2020-A", "PJB1",
                                  "outs")
        mkdirs(counts_dir)
        metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv")
        with open(metrics_summary_file, 'wt') as fp:
            fp.write(METRICS_SUMMARY)
        # Add QC info file
        with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp:
            fp.write(
                """Cellranger reference datasets\t/data/refdata-gex-GRCh38-2020-A
Cellranger version\t5.0.1
""")
        # Check initial cell count
        print("Checking number of cells")
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
        # Attempting to update the cell counts should raise
        # NotImplementedError
        self.assertRaises(NotImplementedError, set_cell_count_for_project,
                          project_dir)
        # Check cell count wasn't updated
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
Beispiel #2
0
    def test_set_cell_count_for_multiome_gex_project(self):
        """
        set_cell_count_for_project: test for single cell multiome GEX
        """
        # Set up mock project
        project_dir = self._make_mock_analysis_project(
            "10xGenomics Single Cell Multiome", "GEX")
        # Add metrics_summary.csv
        counts_dir = os.path.join(project_dir, "qc", "cellranger_count",
                                  "1.0.0",
                                  "refdata-cellranger-arc-GRCh38-2020-A",
                                  "PJB1", "outs")
        mkdirs(counts_dir)
        summary_file = os.path.join(counts_dir, "summary.csv")
        with open(summary_file, 'w') as fp:
            fp.write(MULTIOME_SUMMARY)
        # Add QC info file
        with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp:
            fp.write(
                """Cellranger reference datasets\t/data/refdata-cellranger-arc-GRCh38-2020-A
Cellranger version\t1.0.0
""")
        # Check initial cell count
        print("Checking number of cells")
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
        # Update the cell counts
        print("Updating number of cells")
        set_cell_count_for_project(project_dir)
        # Check updated cell count
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, 744)
Beispiel #3
0
    def test_set_cell_count_project_missing_library_type(self):
        """
        set_cell_count_for_project: test for scRNA-seq when library not set
        """
        # Set up mock project with library type not set
        project_dir = self._make_mock_analysis_project(
            "10xGenomics Chromium 3'v3", None)
        # Add metrics_summary.csv
        counts_dir = os.path.join(project_dir, "qc", "cellranger_count",
                                  "5.0.1", "refdata-gex-GRCh38-2020-A", "PJB1",
                                  "outs")
        mkdirs(counts_dir)
        metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv")
        with open(metrics_summary_file, 'w') as fp:
            fp.write(METRICS_SUMMARY)
        # Add QC info file
        with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp:
            fp.write(
                """Cellranger reference datasets\t/data/refdata-gex-GRCh38-2020-A
Cellranger version\t5.0.1
""")
        # Check initial cell count
        print("Checking number of cells")
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
        # Update the cell counts
        print("Updating number of cells")
        set_cell_count_for_project(project_dir)
        # Check updated cell count
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, 2272)
Beispiel #4
0
 def test_report_single_end_multiple_projects_with_zip_file_duplicated_names_no_data_dir(
         self):
     """
     report: single-end data: fails with two projects in one report (duplicated names/ZIP file/no data directory)
     """
     analysis_dir = self._make_analysis_project(name="PJB",
                                                paired_end=False)
     analysis_dir2 = self._make_analysis_project(name="PJB2",
                                                 paired_end=False)
     project = AnalysisProject('PJB', analysis_dir)
     project2 = AnalysisProject('PJB2', analysis_dir2)
     self.assertRaises(Exception,
                       report, (
                           project,
                           project2,
                       ),
                       title="QC report: PJB & PJB2",
                       filename=os.path.join(
                           self.top_dir, 'PJB',
                           'report.multiple_projects.html'),
                       make_zip=True)
     self.assertTrue(
         os.path.exists(
             os.path.join(self.top_dir, 'PJB',
                          'report.multiple_projects.html')))
     self.assertFalse(
         os.path.exists(
             os.path.join(self.top_dir, 'PJB',
                          'report.multiple_projects.PJB.zip')))
Beispiel #5
0
    def test_set_cell_count_for_atac_project_2_0_0(self):
        """
        set_cell_count_for_project: test for scATAC-seq (Cellranger ATAC 2.0.0)
        """
        # Set up mock project
        project_dir = self._make_mock_analysis_project(
            "10xGenomics Single Cell ATAC", "scATAC-seq")
        # Add metrics_summary.csv
        counts_dir = os.path.join(
            project_dir, "qc", "cellranger_count", "2.0.0",
            "refdata-cellranger-atac-GRCh38-2020-A-2.0.0", "PJB1", "outs")
        mkdirs(counts_dir)
        summary_file = os.path.join(counts_dir, "summary.csv")
        with open(summary_file, 'w') as fp:
            fp.write(ATAC_SUMMARY_2_0_0)
        # Add QC info file
        with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp:
            fp.write(
                """Cellranger reference datasets\t/data/refdata-cellranger-atac-GRCh38-2020-A-2.0.0
Cellranger version\t2.0.0
""")
        # Check initial cell count
        print("Checking number of cells")
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
        # Update the cell counts
        print("Updating number of cells")
        set_cell_count_for_project(project_dir)
        # Check updated cell count
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, 3582)
Beispiel #6
0
 def test_check_illumina_qc_outputs_singlecell_some_missing(self):
     """
     check_illumina_qc_outputs: some illumina_qc.sh outputs missing (singlecell)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Add QC artefacts
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs(
         include_fastq_strand=False, include_multiqc=False)
     # Remove some outputs
     for f in (
             "PJB1_S1_R2_001_fastqc.html",
             "PJB1_S1_R2_001_model_organisms_screen.txt",
     ):
         os.remove(os.path.join(project.qc_dir, f))
     # Check
     self.assertEqual(
         check_illumina_qc_outputs(project,
                                   qc_dir="qc",
                                   qc_protocol="singlecell"),
         [os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz")])
Beispiel #7
0
 def test_qcpipeline_with_strandedness(self):
     """QCPipeline: standard QC run with strandedness determination
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(
         fastq_strand_indexes={'human': '/data/hg38/star_index'},
         poll_interval=0.5,
         max_jobs=1,
         runners={
             'default': SimpleJobRunner(),
         })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #8
0
 def test_qcpipeline(self):
     """QCPipeline: standard QC run
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"))
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 0)
     for f in ("qc", "qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)),
                         "Missing %s" % f)
Beispiel #9
0
 def test_qcpipeline_with_batching_fails_for_missing_outputs(self):
     """QCPipeline: standard QC run with batching fails for missing outputs
     """
     # Make mock illumina_qc.sh and multiqc
     MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"),
                             fastqc=False,
                             exit_code=1)
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Set up and run the QC
     runqc = QCPipeline()
     runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")),
                       multiqc=True)
     status = runqc.run(poll_interval=0.5,
                        max_jobs=1,
                        batch_size=3,
                        runners={
                            'default': SimpleJobRunner(),
                        })
     # Check output and reports
     self.assertEqual(status, 1)
     self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")),
                     "Missing 'qc'")
     for f in ("qc_report.html",
               "qc_report.PJB.%s.zip" % os.path.basename(self.wd),
               "multiqc_report.html"):
         self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)),
                          "Found %s, shouldn't be present" % f)
Beispiel #10
0
 def test_check_fastq_strand_outputs_singlecell_missing(self):
     """
     check_fastq_strand_outputs: fastq_strand.py output missing (singlecell)
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Make fastq_strand.conf
     fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf")
     with open(fastq_strand_conf, 'w') as fp:
         fp.write("")
     # Check the outputs
     self.assertEqual(
         check_fastq_strand_outputs(project,
                                    "qc",
                                    fastq_strand_conf,
                                    qc_protocol="singlecell"),
         [
             (os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz"), ),
         ])
Beispiel #11
0
 def test_expected_outputs_standardSE(self):
     """
     expected_outputs: standard single-end, no strandedness
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Reference outputs
     reference_outputs = (
         "PJB1_S1_R1_001_fastqc",
         "PJB1_S1_R1_001_fastqc.html",
         "PJB1_S1_R1_001_fastqc.zip",
         "PJB1_S1_R1_001_model_organisms_screen.png",
         "PJB1_S1_R1_001_model_organisms_screen.txt",
         "PJB1_S1_R1_001_other_organisms_screen.png",
         "PJB1_S1_R1_001_other_organisms_screen.txt",
         "PJB1_S1_R1_001_rRNA_screen.png",
         "PJB1_S1_R1_001_rRNA_screen.txt",
     )
     expected = expected_outputs(AnalysisProject(
         p.name, os.path.join(self.wd, p.name)),
                                 "qc",
                                 qc_protocol="standardSE")
     for e in expected:
         self.assertEqual(os.path.dirname(e),
                          os.path.join(self.wd, p.name, "qc"))
         self.assertTrue(os.path.basename(e) in reference_outputs)
     for r in reference_outputs:
         self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
Beispiel #12
0
 def test_report_single_end(self):
     """
     report: single-end data
     """
     analysis_dir = self._make_analysis_project(paired_end=False)
     project = AnalysisProject('PJB', analysis_dir)
     report((project, ),
            filename=os.path.join(self.top_dir, 'report.SE.html'))
     self.assertTrue(
         os.path.exists(os.path.join(self.top_dir, 'report.SE.html')))
Beispiel #13
0
 def test_determine_qc_protocol_standardPE(self):
     """determine_qc_protocol: standard paired-end run
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "standardPE")
Beispiel #14
0
 def test_report_single_end_multiple_projects(self):
     """
     report: single-end data: two projects in one report
     """
     analysis_dir = self._make_analysis_project(name="PJB",
                                                paired_end=False)
     analysis_dir2 = self._make_analysis_project(name="PJB2",
                                                 paired_end=False)
     project = AnalysisProject('PJB', analysis_dir)
     project2 = AnalysisProject('PJB2', analysis_dir2)
     report((
         project,
         project2,
     ),
            title="QC report: PJB & PJB2",
            filename=os.path.join(self.top_dir,
                                  'report.multiple_projects.html'))
     self.assertTrue(
         os.path.exists(
             os.path.join(self.top_dir, 'report.multiple_projects.html')))
Beispiel #15
0
 def test_report_single_end_no_seq_lens(self):
     """
     report: single-end data: no sequence lengths
     """
     analysis_dir = self._make_analysis_project(protocol='standardSE',
                                                include_seqlens=False)
     project = AnalysisProject(analysis_dir)
     report((project, ),
            filename=os.path.join(self.top_dir, 'report.SE.html'))
     self.assertTrue(
         os.path.exists(os.path.join(self.top_dir, 'report.SE.html')))
Beispiel #16
0
 def test_determine_qc_protocol_10xchromium3v3(self):
     """determine_qc_protocol: single-cell run (10xGenomics Chromium 3'v3)
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={'Single cell platform': "10xGenomics Chromium 3'v3"})
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "singlecell")
Beispiel #17
0
 def test_report_paired_end_with_no_fastq_dir(self):
     """
     report: paired-end data with no fastq dir
     """
     analysis_dir = self._make_analysis_project(paired_end=True,
                                                fastq_dir=".")
     project = AnalysisProject('PJB', analysis_dir)
     report((project, ),
            filename=os.path.join(self.top_dir, 'report.PE.html'))
     self.assertTrue(
         os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
Beispiel #18
0
 def test_verify_qc_no_outputs(self):
     """verify_qc: project with no QC outputs
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     # Do verification
     self.assertFalse(verify_qc(project))
Beispiel #19
0
 def test_report_paired_end_with_legacy_screens(self):
     """
     report: paired-end data with legacy screen names
     """
     analysis_dir = self._make_analysis_project(paired_end=True,
                                                legacy_screens=True)
     project = AnalysisProject('PJB', analysis_dir)
     report((project, ),
            filename=os.path.join(self.top_dir, 'report.PE.html'))
     self.assertTrue(
         os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
Beispiel #20
0
    def test_set_cell_count_for_cellplex_project(self):
        """
        set_cell_count_for_project: test for multiplexed data (CellPlex)
        """
        # Set up mock project
        project_dir = self._make_mock_analysis_project(
            "10xGenomics Chromium 3'v3", "CellPlex")
        # Build mock cellranger multi output directory
        multi_dir = os.path.join(project_dir, "qc", "cellranger_multi",
                                 "6.0.0",
                                 "refdata-cellranger-gex-GRCh38-2020-A",
                                 "outs")
        mkdirs(multi_dir)
        for sample in (
                "PBA",
                "PBB",
        ):
            sample_dir = os.path.join(multi_dir, "per_sample_outs", sample)
            mkdirs(sample_dir)
            summary_file = os.path.join(sample_dir, "metrics_summary.csv")
            with open(summary_file, 'wt') as fp:
                fp.write(CELLPLEX_METRICS_SUMMARY)
            web_summary = os.path.join(sample_dir, "web_summary.html")
            with open(web_summary, 'wt') as fp:
                fp.write("Placeholder for web_summary.html\n")
        # Add QC info file
        with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp:
            fp.write(
                """Cellranger reference datasets\t/data/refdata-cellranger-gex-GRCh38-2020-A
Cellranger version\t6.0.0
""")
        # Check initial cell count
        print("Checking number of cells")
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
        # Update the cell counts
        print("Updating number of cells")
        set_cell_count_for_project(project_dir)
        # Check updated cell count
        self.assertEqual(
            AnalysisProject("PJB1", project_dir).info.number_of_cells, 10350)
Beispiel #21
0
 def test_qcreporter_paired_end(self):
     """QCReporter: paired-end data
     """
     analysis_dir = self._make_analysis_project(paired_end=True)
     project = AnalysisProject('PJB', analysis_dir)
     reporter = QCReporter(project)
     self.assertEqual(reporter.name, 'PJB')
     self.assertTrue(reporter.paired_end)
     self.assertTrue(reporter.verify())
     reporter.report(filename=os.path.join(self.wd, 'report.PE.html'))
     self.assertTrue(os.path.exists(os.path.join(self.wd,
                                                 'report.PE.html')))
Beispiel #22
0
 def test_report_paired_end_with_non_default_qc_dir(self):
     """
     report: paired-end data with non-default QC dir
     """
     analysis_dir = self._make_analysis_project(paired_end=True,
                                                qc_dir="qc.non_default")
     project = AnalysisProject('PJB', analysis_dir)
     report((project, ),
            filename=os.path.join(self.top_dir, 'report.PE.html'),
            qc_dir="qc.non_default")
     self.assertTrue(
         os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
Beispiel #23
0
 def test_verify_qc_all_outputs(self):
     """verify_qc: project with all QC outputs present
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Do verification
     self.assertTrue(verify_qc(project))
Beispiel #24
0
 def setUp(self):
     # Create a temp working dir
     self.dirn = tempfile.mkdtemp(suffix='TestCellrangerMulti')
     # Make mock analysis project
     p = MockAnalysisProject("PJB",("PJB1_GEX_S1_R1_001.fastq.gz",
                                    "PJB1_GEX_S1_R2_001.fastq.gz",
                                    "PJB2_MC_S2_R1_001.fastq.gz",
                                    "PJB2_MC_S2_R2_001.fastq.gz",),
                             metadata={ 'Organism': 'Human',
                                        'Single cell platform':
                                        "10xGenomics Chromium 3'v3" })
     p.create(top_dir=self.dirn)
     self.project = AnalysisProject("PJB",os.path.join(self.dirn,"PJB"))
Beispiel #25
0
 def test_qcreporter_paired_end_with_non_default_fastq_dir(self):
     """QCReporter: paired-end data with non-default fastq dir
     """
     analysis_dir = self._make_analysis_project(
         paired_end=True, fastq_dir="fastqs.non_default")
     project = AnalysisProject('PJB', analysis_dir)
     reporter = QCReporter(project)
     self.assertEqual(reporter.name, 'PJB')
     self.assertTrue(reporter.paired_end)
     self.assertTrue(reporter.verify())
     reporter.report(filename=os.path.join(self.wd, 'report.PE.html'))
     self.assertTrue(os.path.exists(os.path.join(self.wd,
                                                 'report.PE.html')))
 def test_import_project_with_qc(self):
     """import_project: check project with QC outputs is imported
     """
     # Make mock multiqc
     MockMultiQC.create(os.path.join(self.bin, "multiqc"))
     os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH'])
     # Make an auto-process directory
     mockdir = MockAnalysisDirFactory.bcl2fastq2(
         '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn)
     mockdir.create()
     # Add QC outputs to the project to be imported
     UpdateAnalysisProject(AnalysisProject(
         'NewProj',
         self.new_project_dir)).add_qc_outputs(include_multiqc=False)
     print(os.listdir(os.path.join(self.dirn, 'NewProj')))
     # Check that the project is not currently present
     ap = AutoProcess(mockdir.dirn)
     self.assertFalse(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertFalse(
         'NewProj' in
         [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertFalse(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Import the project
     import_project(ap, self.new_project_dir)
     self.assertTrue(
         'NewProj' in [p.name for p in ap.get_analysis_projects()])
     self.assertTrue('NewProj' in
                     [p.name for p in ap.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap.analysis_dir, 'NewProj')))
     # Verify via fresh AutoProcess object
     ap2 = AutoProcess(mockdir.dirn)
     self.assertTrue(
         'NewProj' in [p.name for p in ap2.get_analysis_projects()])
     self.assertTrue(
         'NewProj' in
         [p.name for p in ap2.get_analysis_projects_from_dirs()])
     self.assertTrue(
         os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
     # Check for QC report and ZIP file
     print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj')))
     for f in (
             "qc_report.html",
             "multiqc_report.html",
             "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip",
     ):
         f = os.path.join(ap2.analysis_dir, 'NewProj', f)
         self.assertTrue(os.path.exists(f), "Missing %s" % f)
Beispiel #27
0
 def test_determine_qc_protocol_10xchromium3v2_atac_seq(self):
     """determine_qc_protocol: single-cell ATAC-seq (10xGenomics Single Cell ATAC)
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"),
         metadata={
             'Single cell platform': "10xGenomics Single Cell ATAC",
             'Library type': "scATAC-seq"
         })
     p.create(top_dir=self.wd)
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     self.assertEqual(determine_qc_protocol(project), "10x_scATAC")
Beispiel #28
0
 def test_expected_outputs_standardPE_with_strand(self):
     """
     expected_outputs: standard paired-end with strandedness
     """
     # Make mock analysis project
     p = MockAnalysisProject("PJB", (
         "PJB1_S1_R1_001.fastq.gz",
         "PJB1_S1_R2_001.fastq.gz",
     ),
                             metadata={'Organism': 'Human'})
     p.create(top_dir=self.wd)
     # Make mock fastq_strand
     mock_fastq_strand_conf = os.path.join(self.wd, p.name,
                                           "fastq_strand.conf")
     with open(mock_fastq_strand_conf, 'w') as fp:
         fp.write("")
     # Reference outputs
     reference_outputs = (
         "PJB1_S1_R1_001_fastqc",
         "PJB1_S1_R1_001_fastqc.html",
         "PJB1_S1_R1_001_fastqc.zip",
         "PJB1_S1_R1_001_model_organisms_screen.png",
         "PJB1_S1_R1_001_model_organisms_screen.txt",
         "PJB1_S1_R1_001_other_organisms_screen.png",
         "PJB1_S1_R1_001_other_organisms_screen.txt",
         "PJB1_S1_R1_001_rRNA_screen.png",
         "PJB1_S1_R1_001_rRNA_screen.txt",
         "PJB1_S1_R2_001_fastqc",
         "PJB1_S1_R2_001_fastqc.html",
         "PJB1_S1_R2_001_fastqc.zip",
         "PJB1_S1_R2_001_model_organisms_screen.png",
         "PJB1_S1_R2_001_model_organisms_screen.txt",
         "PJB1_S1_R2_001_other_organisms_screen.png",
         "PJB1_S1_R2_001_other_organisms_screen.txt",
         "PJB1_S1_R2_001_rRNA_screen.png",
         "PJB1_S1_R2_001_rRNA_screen.txt",
         "PJB1_S1_R1_001_fastq_strand.txt",
     )
     expected = expected_outputs(AnalysisProject(
         p.name, os.path.join(self.wd, p.name)),
                                 "qc",
                                 fastq_strand_conf=mock_fastq_strand_conf,
                                 qc_protocol="standardPE")
     for e in expected:
         self.assertEqual(os.path.dirname(e),
                          os.path.join(self.wd, p.name, "qc"))
         self.assertTrue(os.path.basename(e) in reference_outputs)
     for r in reference_outputs:
         self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
Beispiel #29
0
 def test_set_cell_count_project_missing_library_type_no_subdirs(self):
     """
     set_cell_count_for_project: test for scRNA-seq when library not set (old-style output)
     """
     # Set up mock project with library type not set
     project_dir = self._make_mock_analysis_project(
         "10xGenomics Chromium 3'v3", None)
     # Add metrics_summary.csv
     counts_dir = os.path.join(project_dir, "qc", "cellranger_count",
                               "PJB1", "outs")
     mkdirs(counts_dir)
     metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv")
     with open(metrics_summary_file, 'w') as fp:
         fp.write(METRICS_SUMMARY)
     # Check initial cell count
     print("Checking number of cells")
     self.assertEqual(
         AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
     # Update the cell counts
     print("Updating number of cells")
     set_cell_count_for_project(project_dir)
     # Check updated cell count
     self.assertEqual(
         AnalysisProject("PJB1", project_dir).info.number_of_cells, 2272)
Beispiel #30
0
 def test_verify_qc_incomplete_outputs(self):
     """verify_qc: project with some QC outputs missing
     """
     # Make mock analysis project
     p = MockAnalysisProject(
         "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz",
                 "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"))
     p.create(top_dir=self.wd)
     # Add QC outputs
     project = AnalysisProject("PJB", os.path.join(self.wd, "PJB"))
     UpdateAnalysisProject(project).add_qc_outputs()
     # Remove an output
     os.remove(
         os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html"))
     # Do verification
     self.assertFalse(verify_qc(project))