def test_set_cell_count_fails_for_project_with_no_metadata(self): """ set_cell_count_for_project: raises exception for project with no metadata """ # Set up mock project project_dir = self._make_mock_analysis_project(None, None) # Add metrics_summary.csv counts_dir = os.path.join(project_dir, "qc", "cellranger_count", "5.0.1", "refdata-gex-GRCh38-2020-A", "PJB1", "outs") mkdirs(counts_dir) metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv") with open(metrics_summary_file, 'wt') as fp: fp.write(METRICS_SUMMARY) # Add QC info file with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp: fp.write( """Cellranger reference datasets\t/data/refdata-gex-GRCh38-2020-A Cellranger version\t5.0.1 """) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Attempting to update the cell counts should raise # NotImplementedError self.assertRaises(NotImplementedError, set_cell_count_for_project, project_dir) # Check cell count wasn't updated self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None)
def test_set_cell_count_for_multiome_gex_project(self): """ set_cell_count_for_project: test for single cell multiome GEX """ # Set up mock project project_dir = self._make_mock_analysis_project( "10xGenomics Single Cell Multiome", "GEX") # Add metrics_summary.csv counts_dir = os.path.join(project_dir, "qc", "cellranger_count", "1.0.0", "refdata-cellranger-arc-GRCh38-2020-A", "PJB1", "outs") mkdirs(counts_dir) summary_file = os.path.join(counts_dir, "summary.csv") with open(summary_file, 'w') as fp: fp.write(MULTIOME_SUMMARY) # Add QC info file with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp: fp.write( """Cellranger reference datasets\t/data/refdata-cellranger-arc-GRCh38-2020-A Cellranger version\t1.0.0 """) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Update the cell counts print("Updating number of cells") set_cell_count_for_project(project_dir) # Check updated cell count self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, 744)
def test_set_cell_count_project_missing_library_type(self): """ set_cell_count_for_project: test for scRNA-seq when library not set """ # Set up mock project with library type not set project_dir = self._make_mock_analysis_project( "10xGenomics Chromium 3'v3", None) # Add metrics_summary.csv counts_dir = os.path.join(project_dir, "qc", "cellranger_count", "5.0.1", "refdata-gex-GRCh38-2020-A", "PJB1", "outs") mkdirs(counts_dir) metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv") with open(metrics_summary_file, 'w') as fp: fp.write(METRICS_SUMMARY) # Add QC info file with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp: fp.write( """Cellranger reference datasets\t/data/refdata-gex-GRCh38-2020-A Cellranger version\t5.0.1 """) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Update the cell counts print("Updating number of cells") set_cell_count_for_project(project_dir) # Check updated cell count self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, 2272)
def test_report_single_end_multiple_projects_with_zip_file_duplicated_names_no_data_dir( self): """ report: single-end data: fails with two projects in one report (duplicated names/ZIP file/no data directory) """ analysis_dir = self._make_analysis_project(name="PJB", paired_end=False) analysis_dir2 = self._make_analysis_project(name="PJB2", paired_end=False) project = AnalysisProject('PJB', analysis_dir) project2 = AnalysisProject('PJB2', analysis_dir2) self.assertRaises(Exception, report, ( project, project2, ), title="QC report: PJB & PJB2", filename=os.path.join( self.top_dir, 'PJB', 'report.multiple_projects.html'), make_zip=True) self.assertTrue( os.path.exists( os.path.join(self.top_dir, 'PJB', 'report.multiple_projects.html'))) self.assertFalse( os.path.exists( os.path.join(self.top_dir, 'PJB', 'report.multiple_projects.PJB.zip')))
def test_set_cell_count_for_atac_project_2_0_0(self): """ set_cell_count_for_project: test for scATAC-seq (Cellranger ATAC 2.0.0) """ # Set up mock project project_dir = self._make_mock_analysis_project( "10xGenomics Single Cell ATAC", "scATAC-seq") # Add metrics_summary.csv counts_dir = os.path.join( project_dir, "qc", "cellranger_count", "2.0.0", "refdata-cellranger-atac-GRCh38-2020-A-2.0.0", "PJB1", "outs") mkdirs(counts_dir) summary_file = os.path.join(counts_dir, "summary.csv") with open(summary_file, 'w') as fp: fp.write(ATAC_SUMMARY_2_0_0) # Add QC info file with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp: fp.write( """Cellranger reference datasets\t/data/refdata-cellranger-atac-GRCh38-2020-A-2.0.0 Cellranger version\t2.0.0 """) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Update the cell counts print("Updating number of cells") set_cell_count_for_project(project_dir) # Check updated cell count self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, 3582)
def test_check_illumina_qc_outputs_singlecell_some_missing(self): """ check_illumina_qc_outputs: some illumina_qc.sh outputs missing (singlecell) """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Add QC artefacts project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs( include_fastq_strand=False, include_multiqc=False) # Remove some outputs for f in ( "PJB1_S1_R2_001_fastqc.html", "PJB1_S1_R2_001_model_organisms_screen.txt", ): os.remove(os.path.join(project.qc_dir, f)) # Check self.assertEqual( check_illumina_qc_outputs(project, qc_dir="qc", qc_protocol="singlecell"), [os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz")])
def test_qcpipeline_with_strandedness(self): """QCPipeline: standard QC run with strandedness determination """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) MockFastqStrandPy.create(os.path.join(self.bin, "fastq_strand.py")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run( fastq_strand_indexes={'human': '/data/hg38/star_index'}, poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline(self): """QCPipeline: standard QC run """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh")) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 0) for f in ("qc", "qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", f)), "Missing %s" % f)
def test_qcpipeline_with_batching_fails_for_missing_outputs(self): """QCPipeline: standard QC run with batching fails for missing outputs """ # Make mock illumina_qc.sh and multiqc MockIlluminaQcSh.create(os.path.join(self.bin, "illumina_qc.sh"), fastqc=False, exit_code=1) MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Set up and run the QC runqc = QCPipeline() runqc.add_project(AnalysisProject("PJB", os.path.join(self.wd, "PJB")), multiqc=True) status = runqc.run(poll_interval=0.5, max_jobs=1, batch_size=3, runners={ 'default': SimpleJobRunner(), }) # Check output and reports self.assertEqual(status, 1) self.assertTrue(os.path.exists(os.path.join(self.wd, "PJB", "qc")), "Missing 'qc'") for f in ("qc_report.html", "qc_report.PJB.%s.zip" % os.path.basename(self.wd), "multiqc_report.html"): self.assertFalse(os.path.exists(os.path.join(self.wd, "PJB", f)), "Found %s, shouldn't be present" % f)
def test_check_fastq_strand_outputs_singlecell_missing(self): """ check_fastq_strand_outputs: fastq_strand.py output missing (singlecell) """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Make fastq_strand.conf fastq_strand_conf = os.path.join(project.dirn, "fastq_strand.conf") with open(fastq_strand_conf, 'w') as fp: fp.write("") # Check the outputs self.assertEqual( check_fastq_strand_outputs(project, "qc", fastq_strand_conf, qc_protocol="singlecell"), [ (os.path.join(project.fastq_dir, "PJB1_S1_R2_001.fastq.gz"), ), ])
def test_expected_outputs_standardSE(self): """ expected_outputs: standard single-end, no strandedness """ # Make mock analysis project p = MockAnalysisProject("PJB", ("PJB1_S1_R1_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Reference outputs reference_outputs = ( "PJB1_S1_R1_001_fastqc", "PJB1_S1_R1_001_fastqc.html", "PJB1_S1_R1_001_fastqc.zip", "PJB1_S1_R1_001_model_organisms_screen.png", "PJB1_S1_R1_001_model_organisms_screen.txt", "PJB1_S1_R1_001_other_organisms_screen.png", "PJB1_S1_R1_001_other_organisms_screen.txt", "PJB1_S1_R1_001_rRNA_screen.png", "PJB1_S1_R1_001_rRNA_screen.txt", ) expected = expected_outputs(AnalysisProject( p.name, os.path.join(self.wd, p.name)), "qc", qc_protocol="standardSE") for e in expected: self.assertEqual(os.path.dirname(e), os.path.join(self.wd, p.name, "qc")) self.assertTrue(os.path.basename(e) in reference_outputs) for r in reference_outputs: self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
def test_report_single_end(self): """ report: single-end data """ analysis_dir = self._make_analysis_project(paired_end=False) project = AnalysisProject('PJB', analysis_dir) report((project, ), filename=os.path.join(self.top_dir, 'report.SE.html')) self.assertTrue( os.path.exists(os.path.join(self.top_dir, 'report.SE.html')))
def test_determine_qc_protocol_standardPE(self): """determine_qc_protocol: standard paired-end run """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "standardPE")
def test_report_single_end_multiple_projects(self): """ report: single-end data: two projects in one report """ analysis_dir = self._make_analysis_project(name="PJB", paired_end=False) analysis_dir2 = self._make_analysis_project(name="PJB2", paired_end=False) project = AnalysisProject('PJB', analysis_dir) project2 = AnalysisProject('PJB2', analysis_dir2) report(( project, project2, ), title="QC report: PJB & PJB2", filename=os.path.join(self.top_dir, 'report.multiple_projects.html')) self.assertTrue( os.path.exists( os.path.join(self.top_dir, 'report.multiple_projects.html')))
def test_report_single_end_no_seq_lens(self): """ report: single-end data: no sequence lengths """ analysis_dir = self._make_analysis_project(protocol='standardSE', include_seqlens=False) project = AnalysisProject(analysis_dir) report((project, ), filename=os.path.join(self.top_dir, 'report.SE.html')) self.assertTrue( os.path.exists(os.path.join(self.top_dir, 'report.SE.html')))
def test_determine_qc_protocol_10xchromium3v3(self): """determine_qc_protocol: single-cell run (10xGenomics Chromium 3'v3) """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={'Single cell platform': "10xGenomics Chromium 3'v3"}) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "singlecell")
def test_report_paired_end_with_no_fastq_dir(self): """ report: paired-end data with no fastq dir """ analysis_dir = self._make_analysis_project(paired_end=True, fastq_dir=".") project = AnalysisProject('PJB', analysis_dir) report((project, ), filename=os.path.join(self.top_dir, 'report.PE.html')) self.assertTrue( os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
def test_verify_qc_no_outputs(self): """verify_qc: project with no QC outputs """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) # Do verification self.assertFalse(verify_qc(project))
def test_report_paired_end_with_legacy_screens(self): """ report: paired-end data with legacy screen names """ analysis_dir = self._make_analysis_project(paired_end=True, legacy_screens=True) project = AnalysisProject('PJB', analysis_dir) report((project, ), filename=os.path.join(self.top_dir, 'report.PE.html')) self.assertTrue( os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
def test_set_cell_count_for_cellplex_project(self): """ set_cell_count_for_project: test for multiplexed data (CellPlex) """ # Set up mock project project_dir = self._make_mock_analysis_project( "10xGenomics Chromium 3'v3", "CellPlex") # Build mock cellranger multi output directory multi_dir = os.path.join(project_dir, "qc", "cellranger_multi", "6.0.0", "refdata-cellranger-gex-GRCh38-2020-A", "outs") mkdirs(multi_dir) for sample in ( "PBA", "PBB", ): sample_dir = os.path.join(multi_dir, "per_sample_outs", sample) mkdirs(sample_dir) summary_file = os.path.join(sample_dir, "metrics_summary.csv") with open(summary_file, 'wt') as fp: fp.write(CELLPLEX_METRICS_SUMMARY) web_summary = os.path.join(sample_dir, "web_summary.html") with open(web_summary, 'wt') as fp: fp.write("Placeholder for web_summary.html\n") # Add QC info file with open(os.path.join(project_dir, "qc", "qc.info"), 'wt') as fp: fp.write( """Cellranger reference datasets\t/data/refdata-cellranger-gex-GRCh38-2020-A Cellranger version\t6.0.0 """) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Update the cell counts print("Updating number of cells") set_cell_count_for_project(project_dir) # Check updated cell count self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, 10350)
def test_qcreporter_paired_end(self): """QCReporter: paired-end data """ analysis_dir = self._make_analysis_project(paired_end=True) project = AnalysisProject('PJB', analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, 'PJB') self.assertTrue(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, 'report.PE.html')) self.assertTrue(os.path.exists(os.path.join(self.wd, 'report.PE.html')))
def test_report_paired_end_with_non_default_qc_dir(self): """ report: paired-end data with non-default QC dir """ analysis_dir = self._make_analysis_project(paired_end=True, qc_dir="qc.non_default") project = AnalysisProject('PJB', analysis_dir) report((project, ), filename=os.path.join(self.top_dir, 'report.PE.html'), qc_dir="qc.non_default") self.assertTrue( os.path.exists(os.path.join(self.top_dir, 'report.PE.html')))
def test_verify_qc_all_outputs(self): """verify_qc: project with all QC outputs present """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Do verification self.assertTrue(verify_qc(project))
def setUp(self): # Create a temp working dir self.dirn = tempfile.mkdtemp(suffix='TestCellrangerMulti') # Make mock analysis project p = MockAnalysisProject("PJB",("PJB1_GEX_S1_R1_001.fastq.gz", "PJB1_GEX_S1_R2_001.fastq.gz", "PJB2_MC_S2_R1_001.fastq.gz", "PJB2_MC_S2_R2_001.fastq.gz",), metadata={ 'Organism': 'Human', 'Single cell platform': "10xGenomics Chromium 3'v3" }) p.create(top_dir=self.dirn) self.project = AnalysisProject("PJB",os.path.join(self.dirn,"PJB"))
def test_qcreporter_paired_end_with_non_default_fastq_dir(self): """QCReporter: paired-end data with non-default fastq dir """ analysis_dir = self._make_analysis_project( paired_end=True, fastq_dir="fastqs.non_default") project = AnalysisProject('PJB', analysis_dir) reporter = QCReporter(project) self.assertEqual(reporter.name, 'PJB') self.assertTrue(reporter.paired_end) self.assertTrue(reporter.verify()) reporter.report(filename=os.path.join(self.wd, 'report.PE.html')) self.assertTrue(os.path.exists(os.path.join(self.wd, 'report.PE.html')))
def test_import_project_with_qc(self): """import_project: check project with QC outputs is imported """ # Make mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Add QC outputs to the project to be imported UpdateAnalysisProject(AnalysisProject( 'NewProj', self.new_project_dir)).add_qc_outputs(include_multiqc=False) print(os.listdir(os.path.join(self.dirn, 'NewProj'))) # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Import the project import_project(ap, self.new_project_dir) self.assertTrue( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj'))) # Check for QC report and ZIP file print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj'))) for f in ( "qc_report.html", "multiqc_report.html", "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip", ): f = os.path.join(ap2.analysis_dir, 'NewProj', f) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_determine_qc_protocol_10xchromium3v2_atac_seq(self): """determine_qc_protocol: single-cell ATAC-seq (10xGenomics Single Cell ATAC) """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz"), metadata={ 'Single cell platform': "10xGenomics Single Cell ATAC", 'Library type': "scATAC-seq" }) p.create(top_dir=self.wd) project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) self.assertEqual(determine_qc_protocol(project), "10x_scATAC")
def test_expected_outputs_standardPE_with_strand(self): """ expected_outputs: standard paired-end with strandedness """ # Make mock analysis project p = MockAnalysisProject("PJB", ( "PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", ), metadata={'Organism': 'Human'}) p.create(top_dir=self.wd) # Make mock fastq_strand mock_fastq_strand_conf = os.path.join(self.wd, p.name, "fastq_strand.conf") with open(mock_fastq_strand_conf, 'w') as fp: fp.write("") # Reference outputs reference_outputs = ( "PJB1_S1_R1_001_fastqc", "PJB1_S1_R1_001_fastqc.html", "PJB1_S1_R1_001_fastqc.zip", "PJB1_S1_R1_001_model_organisms_screen.png", "PJB1_S1_R1_001_model_organisms_screen.txt", "PJB1_S1_R1_001_other_organisms_screen.png", "PJB1_S1_R1_001_other_organisms_screen.txt", "PJB1_S1_R1_001_rRNA_screen.png", "PJB1_S1_R1_001_rRNA_screen.txt", "PJB1_S1_R2_001_fastqc", "PJB1_S1_R2_001_fastqc.html", "PJB1_S1_R2_001_fastqc.zip", "PJB1_S1_R2_001_model_organisms_screen.png", "PJB1_S1_R2_001_model_organisms_screen.txt", "PJB1_S1_R2_001_other_organisms_screen.png", "PJB1_S1_R2_001_other_organisms_screen.txt", "PJB1_S1_R2_001_rRNA_screen.png", "PJB1_S1_R2_001_rRNA_screen.txt", "PJB1_S1_R1_001_fastq_strand.txt", ) expected = expected_outputs(AnalysisProject( p.name, os.path.join(self.wd, p.name)), "qc", fastq_strand_conf=mock_fastq_strand_conf, qc_protocol="standardPE") for e in expected: self.assertEqual(os.path.dirname(e), os.path.join(self.wd, p.name, "qc")) self.assertTrue(os.path.basename(e) in reference_outputs) for r in reference_outputs: self.assertTrue(os.path.join(self.wd, p.name, "qc", r) in expected)
def test_set_cell_count_project_missing_library_type_no_subdirs(self): """ set_cell_count_for_project: test for scRNA-seq when library not set (old-style output) """ # Set up mock project with library type not set project_dir = self._make_mock_analysis_project( "10xGenomics Chromium 3'v3", None) # Add metrics_summary.csv counts_dir = os.path.join(project_dir, "qc", "cellranger_count", "PJB1", "outs") mkdirs(counts_dir) metrics_summary_file = os.path.join(counts_dir, "metrics_summary.csv") with open(metrics_summary_file, 'w') as fp: fp.write(METRICS_SUMMARY) # Check initial cell count print("Checking number of cells") self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, None) # Update the cell counts print("Updating number of cells") set_cell_count_for_project(project_dir) # Check updated cell count self.assertEqual( AnalysisProject("PJB1", project_dir).info.number_of_cells, 2272)
def test_verify_qc_incomplete_outputs(self): """verify_qc: project with some QC outputs missing """ # Make mock analysis project p = MockAnalysisProject( "PJB", ("PJB1_S1_R1_001.fastq.gz", "PJB1_S1_R2_001.fastq.gz", "PJB2_S2_R1_001.fastq.gz", "PJB2_S2_R2_001.fastq.gz")) p.create(top_dir=self.wd) # Add QC outputs project = AnalysisProject("PJB", os.path.join(self.wd, "PJB")) UpdateAnalysisProject(project).add_qc_outputs() # Remove an output os.remove( os.path.join(self.wd, "PJB", "qc", "PJB1_S1_R1_001_fastqc.html")) # Do verification self.assertFalse(verify_qc(project))