def test_publish_qc_barcode_analysis(self): """publish_qc: barcode analysis outputs """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing and barcode analysis reports UpdateAnalysisDir(ap).add_processing_report() UpdateAnalysisDir(ap).add_barcode_analysis() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish QC publish_qc(ap, location=publication_dir) # Check outputs outputs = ("index.html", "processing_qc.html", os.path.join("barcodes", "barcodes.report"), os.path.join("barcodes", "barcodes.xls"), os.path.join("barcodes", "barcodes.html")) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_publish_qc_with_cellranger_qc_multiple_lanes_subsets(self): """publish_qc: publish cellranger QC output (multiple subsets of lanes) """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing and cellranger QC reports UpdateAnalysisDir(ap).add_processing_report() UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="45") UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="78") # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = [ "index.html", "processing_qc.html", "cellranger_qc_summary_45.html", "cellranger_qc_summary_78.html" ] # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_publish_qc_missing_destination(self): """publish_qc: raise exception if destination doesn't exist """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Reference publication area which doesn't exist publication_dir = os.path.join(self.dirn, 'QC') self.assertFalse(os.path.exists(publication_dir)) # Publish self.assertRaises(Exception, publish_qc, ap, location=publication_dir) self.assertFalse(os.path.exists(publication_dir))
def test_publish_qc_with_project_missing_qc(self): """publish_qc: raises exception if project has missing QC """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Add QC outputs for subset of projects projects = ap.get_analysis_projects()[1:] for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
def test_publish_qc_with_projects_with_multiple_fastq_sets(self): """publish_qc: projects with multiple Fastq sets """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Add additional fastq set for first project multi_fastqs_project = ap.get_analysis_projects()[0] UpdateAnalysisProject(multi_fastqs_project).add_fastq_set( "fastqs.extra", ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz")) UpdateAnalysisProject(multi_fastqs_project).add_qc_outputs( fastq_set="fastqs.extra", qc_dir="qc.extra") # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # Additional QC for second fastq set in first project project_qc = "qc.extra_report.%s.%s" % (multi_fastqs_project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc.extra_report.html")) outputs.append(os.path.join(project_qc,"qc.extra")) # MultiQC output outputs.append("multiqc.extra_report.%s.html" % multi_fastqs_project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_clone_analysis_dir_copy_fastqs(self): """ clone: copies an analysis directory """ # Make a source analysis dir analysis_dir = MockAnalysisDirFactory.bcl2fastq2( "190116_M01234_0002_AXYZ123", platform="miseq", paired_end=True, no_lane_splitting=False, include_stats_files=True, top_dir=self.dirn) analysis_dir.create() ap = AutoProcess(analysis_dir.dirn) UpdateAnalysisDir(ap).add_processing_report() ap.add_directory("primary_data/190116_M01234_0002_AXYZ123") # Make a copy clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy") self.assertFalse(os.path.exists(clone_dir)) clone(ap, clone_dir, copy_fastqs=True) self.assertTrue(os.path.isdir(clone_dir)) # Check contents for subdir in ('logs', 'ScriptCode'): d = os.path.join(clone_dir, subdir) self.assertTrue(os.path.isdir(d), "Missing '%s'" % subdir) for filen in ( 'SampleSheet.orig.csv', 'custom_SampleSheet.csv', 'auto_process.info', 'metadata.info', 'statistics.info', 'statistics_full.info', 'per_lane_statistics.info', 'per_lane_sample_stats.info', 'processing_qc.html', ): f = os.path.join(clone_dir, filen) self.assertTrue(os.path.isfile(f), "Missing '%s'" % filen) # Check unaligned unaligned = os.path.join(clone_dir, 'bcl2fastq') self.assertTrue(os.path.isdir(unaligned)) # Check primary data primary_data = os.path.join(clone_dir, 'primary_data', '190116_M01234_0002_AXYZ123') self.assertTrue(os.path.islink(primary_data)) # Check projects for proj in ('AB', 'CDE', 'undetermined'): d = os.path.join(clone_dir, proj) self.assertTrue(os.path.isdir(d), "Missing '%s'" % proj) # Check parameters params = AnalysisDirParameters( filen=os.path.join(clone_dir, 'auto_process.info')) self.assertEqual(params.sample_sheet, os.path.join(clone_dir, "custom_SampleSheet.csv")) self.assertEqual(params.primary_data_dir, os.path.join(clone_dir, "primary_data"))
def test_publish_qc_subset_of_projects(self): """publish_qc: only publish subset of projects """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Add QC outputs for subset of projects projects = ap.get_analysis_projects() missing_projects = projects[1:] projects = projects[0:1] for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir, projects="AB*") # Check outputs outputs = ["index.html", "processing_qc.html"] for project in projects: # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f) # Check that missing projects weren't copied for project in missing_projects: self.assertFalse(os.path.exists( os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)))), "%s exists in final dir, but shouldn't" % project.name)
def test_publish_qc_with_icell8_outputs(self): """publish_qc: project with ICell8 QC outputs """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() projects = ap.get_analysis_projects() for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Add ICell8 report for one project icell8_project = projects[0] UpdateAnalysisProject(icell8_project).add_icell8_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # ICell8 outputs icell8_dir = "icell8_processing.%s.%s" % (icell8_project.name, os.path.basename( ap.analysis_dir)) outputs.append(icell8_dir) outputs.append("%s.zip" % icell8_dir) outputs.append(os.path.join(icell8_dir,"icell8_processing_data")) outputs.append(os.path.join(icell8_dir,"icell8_processing.html")) outputs.append(os.path.join(icell8_dir,"stats")) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_publish_qc_with_projects_no_reports(self): """publish_qc: projects with all QC outputs but no reports """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Remove the QC reports for project in ap.get_analysis_projects(): qc_reports = [] qc_reports.append( "qc_report.%s.%s.zip" % (project.name, os.path.basename(ap.analysis_dir))) qc_reports.append("qc_report.html") qc_reports.append("multiqc_report.html") for f in qc_reports: os.remove(os.path.join(project.dirn, f)) # Make a mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % ( project.name, os.path.basename(ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc, "qc_report.html")) outputs.append(os.path.join(project_qc, "qc")) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_publish_qc_use_hierarchy(self): """publish_qc: publish using YEAR/PLATFORM hierarchy """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir, use_hierarchy=True) # Check outputs final_dir = os.path.join(publication_dir, "2016", "hiseq") self.assertTrue(os.path.exists(final_dir)) outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) for item in outputs: f = os.path.join(final_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_clone_fails_if_target_dir_exists(self): """ clone: raises an exception if target dir already exists """ # Make a source analysis dir analysis_dir = MockAnalysisDirFactory.bcl2fastq2( "190116_M01234_0002_AXYZ123", platform="miseq", paired_end=True, no_lane_splitting=False, include_stats_files=True, top_dir=self.dirn) analysis_dir.create() ap = AutoProcess(analysis_dir.dirn) UpdateAnalysisDir(ap).add_processing_report() ap.add_directory("primary_data/190116_M01234_0002_AXYZ123") # Make target dir clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy") os.mkdir(clone_dir) # Try to copy source dir self.assertRaises(Exception, clone, ap, clone_dir)
def test_publish_qc_exclude_zip_files(self): """publish_qc: exclude ZIP files from publication """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() projects = ap.get_analysis_projects() for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Add ICell8 report for one project icell8_project = projects[0] UpdateAnalysisProject(icell8_project).add_icell8_outputs() # Add cellranger count output for one project tenxgenomics_project = projects[-1] UpdateAnalysisProject( tenxgenomics_project).add_cellranger_count_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir, exclude_zip_files=True) # Check outputs outputs = ["index.html", "processing_qc.html"] zip_files = [] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % ( project.name, os.path.basename(ap.analysis_dir)) outputs.append(project_qc) outputs.append(os.path.join(project_qc, "qc_report.html")) outputs.append(os.path.join(project_qc, "qc")) zip_files.append("%s.zip" % project_qc) # ICell8 outputs icell8_dir = "icell8_processing.%s.%s" % ( icell8_project.name, os.path.basename(ap.analysis_dir)) outputs.append(icell8_dir) outputs.append(os.path.join(icell8_dir, "icell8_processing_data")) outputs.append(os.path.join(icell8_dir, "icell8_processing.html")) outputs.append(os.path.join(icell8_dir, "stats")) zip_files.append("%s.zip" % icell8_dir) # Cellranger count outputs cellranger_count_dir = "cellranger_count_report.%s.%s" % ( tenxgenomics_project.name, os.path.basename(ap.analysis_dir)) outputs.append(cellranger_count_dir) outputs.append( os.path.join(cellranger_count_dir, "cellranger_count_report.html")) outputs.append(os.path.join(cellranger_count_dir, "cellranger_count")) for sample in tenxgenomics_project.samples: outputs.append( os.path.join(cellranger_count_dir, "cellranger_count", sample.name, "outs", "web_summary.html")) zip_files.append("%s.zip" % cellranger_count_dir) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f) # Check the ZIP files were excluded for zip_file in zip_files: self.assertFalse( os.path.exists( os.path.join( publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", zip_file)), "ZIP file '%s' exists, but shouldn't" % zip_file)
def test_publish_qc_with_cellranger_counts(self): """publish_qc: project with cellranger count output """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing and cellranger QC reports UpdateAnalysisDir(ap).add_processing_report() UpdateAnalysisDir(ap).add_cellranger_qc_output() # Add QC outputs projects = ap.get_analysis_projects() for project in projects: UpdateAnalysisProject(project).add_qc_outputs( protocol="singlecell") # Add cellranger count output for one project tenxgenomics_project = projects[0] UpdateAnalysisProject( tenxgenomics_project).add_cellranger_count_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = [ "index.html", "processing_qc.html", "cellranger_qc_summary.html" ] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % ( project.name, os.path.basename(ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc, "qc_report.html")) outputs.append(os.path.join(project_qc, "qc")) # Cellranger count outputs cellranger_count_dir = "cellranger_count_report.%s.%s" % ( tenxgenomics_project.name, os.path.basename(ap.analysis_dir)) outputs.append(cellranger_count_dir) outputs.append("%s.zip" % cellranger_count_dir) outputs.append( os.path.join(cellranger_count_dir, "cellranger_count_report.html")) outputs.append(os.path.join(cellranger_count_dir, "cellranger_count")) for sample in tenxgenomics_project.samples: outputs.append( os.path.join(cellranger_count_dir, "cellranger_count", sample.name, "outs", "web_summary.html")) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_clone_analysis_dir_empty_params(self): """ clone: copies an analysis directory when parameter file is empty """ # Make a source analysis dir analysis_dir = MockAnalysisDirFactory.bcl2fastq2( "190116_M01234_0002_AXYZ123", platform="miseq", paired_end=True, no_lane_splitting=False, include_stats_files=True, top_dir=self.dirn) analysis_dir.create() ap = AutoProcess(analysis_dir.dirn) UpdateAnalysisDir(ap).add_processing_report() ap.add_directory("primary_data/190116_M01234_0002_AXYZ123") # Remove data from parameter file parameter_file = ap.parameter_file tmp_parameter_file = os.path.join(self.dirn, 'new_params.tmp') del (ap) with open(parameter_file, 'r') as fp: with open(tmp_parameter_file, 'w') as fpp: for line in fp: line = "%s\t." % line.split('\t')[0] fpp.write(line) os.remove(parameter_file) os.rename(tmp_parameter_file, parameter_file) ap = AutoProcess(analysis_dir.dirn) # Make a copy clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy") self.assertFalse(os.path.exists(clone_dir)) clone(ap, clone_dir, exclude_projects=False) self.assertTrue(os.path.isdir(clone_dir)) # Check contents for subdir in ('logs', 'ScriptCode'): d = os.path.join(clone_dir, subdir) self.assertTrue(os.path.isdir(d), "Missing '%s'" % subdir) for filen in ( 'SampleSheet.orig.csv', 'custom_SampleSheet.csv', 'auto_process.info', 'metadata.info', 'statistics.info', 'statistics_full.info', 'per_lane_statistics.info', 'per_lane_sample_stats.info', 'processing_qc.html', ): f = os.path.join(clone_dir, filen) self.assertTrue(os.path.isfile(f), "Missing '%s'" % filen) # Check unaligned unaligned = os.path.join(clone_dir, 'bcl2fastq') self.assertTrue(os.path.islink(unaligned)) # Check primary data primary_data = os.path.join(clone_dir, 'primary_data', '190116_M01234_0002_AXYZ123') self.assertFalse(os.path.exists(primary_data)) # Check projects for proj in ('AB', 'CDE', 'undetermined'): d = os.path.join(clone_dir, proj) self.assertTrue(os.path.exists(d), "Missing '%s'" % proj)