def test_make_fastqs_unknown_platform(self): """make_fastqs: unknown platform raises exception """ # Create mock source data illumina_run = MockIlluminaRun( "171020_UNKNOWN_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) self.assertRaises(Exception, make_fastqs, ap, protocol="standard")
def test_publish_qc_with_projects_with_multiple_fastq_sets(self): """publish_qc: projects with multiple Fastq sets """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Add additional fastq set for first project multi_fastqs_project = ap.get_analysis_projects()[0] UpdateAnalysisProject(multi_fastqs_project).add_fastq_set( "fastqs.extra", ("Alt1.r1.fastq.gz","Alt2.r1.fastq.gz")) UpdateAnalysisProject(multi_fastqs_project).add_qc_outputs( fastq_set="fastqs.extra", qc_dir="qc.extra") # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # Additional QC for second fastq set in first project project_qc = "qc.extra_report.%s.%s" % (multi_fastqs_project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc.extra_report.html")) outputs.append(os.path.join(project_qc,"qc.extra")) # MultiQC output outputs.append("multiqc.extra_report.%s.html" % multi_fastqs_project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_make_fastqs_handle_bcl2fastq2_failure(self): """make_fastqs: handle bcl2fastq2 failure """ # Create mock source data illumina_run = MockIlluminaRun("171020_M00879_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq which will fail (i.e. # return non-zero exit code) MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), exit_code=1) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess() ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertRaises(Exception, ap.make_fastqs, protocol="standard") # Check outputs analysis_dir = os.path.join(self.wd, "171020_M00879_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_M00879_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq"): self.assertTrue(os.path.isdir(os.path.join(analysis_dir, subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertFalse(os.path.exists(os.path.join(analysis_dir, filen)), "Missing file: %s" % filen)
def test_import_project(self): """Check AutoProcess.import_project imports a project """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse('NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse(os.path.exists(os.path.join(ap.analysis_dir,'NewProj'))) # Import the project ap.import_project(self.new_project_dir) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue(os.path.exists(os.path.join(ap.analysis_dir,'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue('NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue(os.path.exists(os.path.join(ap2.analysis_dir,'NewProj')))
def test_import_project(self): """import_project: check project is imported """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Import the project import_project(ap, self.new_project_dir) self.assertTrue( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj')))
def test_publish_qc_with_project_missing_qc(self): """publish_qc: raises exception if project has missing QC """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Add QC outputs for subset of projects projects = ap.get_analysis_projects()[1:] for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish self.assertRaises(Exception, publish_qc, ap, location=publication_dir)
def test_analysis_dir_path(self): """AutoProcess: analysis dir path is absolute and normalized """ # Create mock Illumina run directory mock_illumina_run = MockIlluminaRun( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mock_illumina_run.create() # Set up new AutoProcess instance ap = AutoProcess() self.assertEqual(ap.analysis_dir, None) # Make a mock analysis dir mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Create Autoprocess instances from different # forms of path and check stored value rel_path = "160621_M00879_0087_000000000-AGEW9_analysis" abs_path = os.path.join(self.dirn, rel_path) rel_unnormalised = os.path.join("..", os.path.basename(self.dirn), rel_path) abs_unnormalised = os.path.join(self.dirn, rel_unnormalised) ap = AutoProcess(analysis_dir=abs_path) self.assertEqual(ap.analysis_dir, abs_path) ap = AutoProcess(analysis_dir=rel_path) self.assertEqual(ap.analysis_dir, abs_path) ap = AutoProcess(analysis_dir=abs_unnormalised) self.assertEqual(ap.analysis_dir, abs_path) ap = AutoProcess(analysis_dir=rel_unnormalised) self.assertEqual(ap.analysis_dir, abs_path)
def test_publish_qc_missing_destination(self): """publish_qc: raise exception if destination doesn't exist """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Reference publication area which doesn't exist publication_dir = os.path.join(self.dirn, 'QC') self.assertFalse(os.path.exists(publication_dir)) # Publish self.assertRaises(Exception, publish_qc, ap, location=publication_dir) self.assertFalse(os.path.exists(publication_dir))
def test_casava_new_output_dir(self): """ AutoProcess.merge_fastq_dirs: casava/bcl2fastq v1.8.* output, new output dir """ analysis_dir = self._setup_casava() # Merge the unaligned dirs self.ap = AutoProcess(analysis_dir) self.ap.merge_fastq_dirs("bcl2fastq.lanes1-2", output_dir="bcl2fastq") # Check outputs self._assert_dir_exists( os.path.join(analysis_dir, 'save.bcl2fastq.lanes1-2')) self._assert_dir_exists( os.path.join(analysis_dir, 'save.bcl2fastq.lanes3-4')) self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.lanes1-2')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.lanes3-4')) for f in ( 'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R1_001.fastq.gz', 'Project_AB/Sample_AB1/AB1_GCCAAT_L001_R2_001.fastq.gz', 'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R1_001.fastq.gz', 'Project_AB/Sample_AB2/AB2_AGTCAA_L001_R2_001.fastq.gz', 'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R1_001.fastq.gz', 'Project_AB/Sample_AB1/AB1_GCCAAT_L002_R2_001.fastq.gz', 'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R1_001.fastq.gz', 'Project_AB/Sample_AB2/AB2_AGTCAA_L002_R2_001.fastq.gz', 'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R1_001.fastq.gz', 'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L003_R2_001.fastq.gz', 'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R1_001.fastq.gz', 'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L003_R2_001.fastq.gz', 'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R1_001.fastq.gz', 'Project_CDE/Sample_CDE3/CDE3_GCCAAT_L004_R2_001.fastq.gz', 'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R1_001.fastq.gz', 'Project_CDE/Sample_CDE4/CDE4_AGTCAA_L004_R2_001.fastq.gz', 'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R1_001.fastq.gz', 'Undetermined_indices/Sample_lane1/lane1_Undetermined_L001_R2_001.fastq.gz', 'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R1_001.fastq.gz', 'Undetermined_indices/Sample_lane2/lane2_Undetermined_L002_R2_001.fastq.gz', 'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R1_001.fastq.gz', 'Undetermined_indices/Sample_lane3/lane3_Undetermined_L003_R2_001.fastq.gz', 'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R1_001.fastq.gz', 'Undetermined_indices/Sample_lane4/lane4_Undetermined_L004_R2_001.fastq.gz' ): self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq', f)) # Check projects.info files self._assert_file_exists( os.path.join(analysis_dir, 'save.projects.info')) self._assert_file_exists(os.path.join(analysis_dir, 'projects.info')) projects_info = open(os.path.join(analysis_dir, 'projects.info'), 'r').read() expected = """#Project Samples User Library Organism PI Comments AB AB1,AB2 . . . . . CDE CDE3,CDE4 . . . . . """ self.assertEqual(projects_info, expected)
def test_publish_qc_subset_of_projects(self): """publish_qc: only publish subset of projects """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Add QC outputs for subset of projects projects = ap.get_analysis_projects() missing_projects = projects[1:] projects = projects[0:1] for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir, projects="AB*") # Check outputs outputs = ["index.html", "processing_qc.html"] for project in projects: # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f) # Check that missing projects weren't copied for project in missing_projects: self.assertFalse(os.path.exists( os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)))), "%s exists in final dir, but shouldn't" % project.name)
def test_make_fastqs_specify_platform_via_metadata(self): """make_fastqs: implicitly specify the platform via metadata """ # Create mock source data illumina_run = MockIlluminaRun( "171020_UNKNOWN_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), platform="miseq") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertTrue(ap.metadata.platform is None) ap.metadata["platform"] = "miseq" self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="standard") # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_UNKNOWN_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_UNKNOWN_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_UNKNOWN_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_clone_analysis_dir_copy_fastqs(self): """ clone: copies an analysis directory """ # Make a source analysis dir analysis_dir = MockAnalysisDirFactory.bcl2fastq2( "190116_M01234_0002_AXYZ123", platform="miseq", paired_end=True, no_lane_splitting=False, include_stats_files=True, top_dir=self.dirn) analysis_dir.create() ap = AutoProcess(analysis_dir.dirn) UpdateAnalysisDir(ap).add_processing_report() ap.add_directory("primary_data/190116_M01234_0002_AXYZ123") # Make a copy clone_dir = os.path.join(self.dirn, "190116_M01234_0002_AXYZ123_copy") self.assertFalse(os.path.exists(clone_dir)) clone(ap, clone_dir, copy_fastqs=True) self.assertTrue(os.path.isdir(clone_dir)) # Check contents for subdir in ('logs', 'ScriptCode'): d = os.path.join(clone_dir, subdir) self.assertTrue(os.path.isdir(d), "Missing '%s'" % subdir) for filen in ( 'SampleSheet.orig.csv', 'custom_SampleSheet.csv', 'auto_process.info', 'metadata.info', 'statistics.info', 'statistics_full.info', 'per_lane_statistics.info', 'per_lane_sample_stats.info', 'processing_qc.html', ): f = os.path.join(clone_dir, filen) self.assertTrue(os.path.isfile(f), "Missing '%s'" % filen) # Check unaligned unaligned = os.path.join(clone_dir, 'bcl2fastq') self.assertTrue(os.path.isdir(unaligned)) # Check primary data primary_data = os.path.join(clone_dir, 'primary_data', '190116_M01234_0002_AXYZ123') self.assertTrue(os.path.islink(primary_data)) # Check projects for proj in ('AB', 'CDE', 'undetermined'): d = os.path.join(clone_dir, proj) self.assertTrue(os.path.isdir(d), "Missing '%s'" % proj) # Check parameters params = AnalysisDirParameters( filen=os.path.join(clone_dir, 'auto_process.info')) self.assertEqual(params.sample_sheet, os.path.join(clone_dir, "custom_SampleSheet.csv")) self.assertEqual(params.primary_data_dir, os.path.join(clone_dir, "primary_data"))
def test_publish_qc_with_icell8_outputs(self): """publish_qc: project with ICell8 QC outputs """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() projects = ap.get_analysis_projects() for project in projects: UpdateAnalysisProject(project).add_qc_outputs() # Add ICell8 report for one project icell8_project = projects[0] UpdateAnalysisProject(icell8_project).add_icell8_outputs() # Make a mock publication area publication_dir = os.path.join(self.dirn,'QC') os.mkdir(publication_dir) # Publish publish_qc(ap,location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % (project.name, os.path.basename( ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc,"qc_report.html")) outputs.append(os.path.join(project_qc,"qc")) # MultiQC output outputs.append("multiqc_report.%s.html" % project.name) # ICell8 outputs icell8_dir = "icell8_processing.%s.%s" % (icell8_project.name, os.path.basename( ap.analysis_dir)) outputs.append(icell8_dir) outputs.append("%s.zip" % icell8_dir) outputs.append(os.path.join(icell8_dir,"icell8_processing_data")) outputs.append(os.path.join(icell8_dir,"icell8_processing.html")) outputs.append(os.path.join(icell8_dir,"stats")) # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f),"Missing %s" % f)
def test_bcl2fastq2_new_output_dir(self): """ AutoProcess.merge_fastq_dirs: bcl2fastq v2 output, new output dir """ analysis_dir = self._setup_bcl2fastq2() # Merge the unaligned dirs self.ap = AutoProcess(analysis_dir) self.ap.merge_fastq_dirs("bcl2fastq.lanes1-2", output_dir="bcl2fastq") # Check outputs self._assert_dir_exists( os.path.join(analysis_dir, 'save.bcl2fastq.lanes1-2')) self._assert_dir_exists( os.path.join(analysis_dir, 'save.bcl2fastq.lanes3-4')) self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.lanes1-2')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.lanes3-4')) for f in ('AB/AB1_S1_L001_R1_001.fastq.gz', 'AB/AB1_S1_L001_R2_001.fastq.gz', 'AB/AB2_S2_L001_R1_001.fastq.gz', 'AB/AB2_S2_L001_R2_001.fastq.gz', 'AB/AB1_S1_L002_R1_001.fastq.gz', 'AB/AB1_S1_L002_R2_001.fastq.gz', 'AB/AB2_S2_L002_R1_001.fastq.gz', 'AB/AB2_S2_L002_R2_001.fastq.gz', 'CDE/CDE3_S3_L003_R1_001.fastq.gz', 'CDE/CDE3_S3_L003_R2_001.fastq.gz', 'CDE/CDE4_S4_L003_R1_001.fastq.gz', 'CDE/CDE4_S4_L003_R2_001.fastq.gz', 'CDE/CDE3_S3_L004_R1_001.fastq.gz', 'CDE/CDE3_S3_L004_R2_001.fastq.gz', 'CDE/CDE4_S4_L004_R1_001.fastq.gz', 'CDE/CDE4_S4_L004_R2_001.fastq.gz', 'Undetermined_S0_L001_R1_001.fastq.gz', 'Undetermined_S0_L001_R2_001.fastq.gz', 'Undetermined_S0_L002_R1_001.fastq.gz', 'Undetermined_S0_L002_R2_001.fastq.gz', 'Undetermined_S0_L003_R1_001.fastq.gz', 'Undetermined_S0_L003_R2_001.fastq.gz', 'Undetermined_S0_L004_R1_001.fastq.gz', 'Undetermined_S0_L004_R2_001.fastq.gz'): self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq', f)) # Check projects.info files self._assert_file_exists( os.path.join(analysis_dir, 'save.projects.info')) self._assert_file_exists(os.path.join(analysis_dir, 'projects.info')) projects_info = open(os.path.join(analysis_dir, 'projects.info'), 'r').read() expected = """#Project Samples User Library Organism PI Comments AB AB1,AB2 . . . . . CDE CDE3,CDE4 . . . . . """ self.assertEqual(projects_info, expected)
def test_make_fastqs_icell8_protocol(self): """make_fastqs: icell8 protocol """ # Create mock source data illumina_run = MockIlluminaRun( "171020_SN7001250_00002_AHGXXXX", "hiseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq # Check that bases mask is as expected MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), assert_bases_mask="y25n76,I8,I8,y101") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="icell8") # Check parameters self.assertEqual(ap.params.bases_mask,"auto") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_SN7001250_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_SN7001250_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_icell8"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_bcl2fastq2_no_lane_splitting_new_output_dir(self): """ AutoProcess.merge_fastq_dirs: bcl2fastq v2 output with --no-lane-splitting, new output dir """ analysis_dir = self._setup_bcl2fastq2_no_lane_splitting() # Merge the unaligned dirs self.ap = AutoProcess(analysis_dir) self.ap.merge_fastq_dirs("bcl2fastq.AB", output_dir="bcl2fastq") # Check outputs self._assert_dir_exists(os.path.join(analysis_dir, 'save.bcl2fastq.AB')) self._assert_dir_exists( os.path.join(analysis_dir, 'save.bcl2fastq.CDE')) self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.AB')) self._assert_dir_doesnt_exist( os.path.join(analysis_dir, 'bcl2fastq.CDE')) for f in ( 'AB/AB1_S1_R1_001.fastq.gz', 'AB/AB1_S1_R2_001.fastq.gz', 'AB/AB2_S2_R1_001.fastq.gz', 'AB/AB2_S2_R2_001.fastq.gz', 'CDE/CDE3_S3_R1_001.fastq.gz', 'CDE/CDE3_S3_R2_001.fastq.gz', 'CDE/CDE4_S4_R1_001.fastq.gz', 'CDE/CDE4_S4_R2_001.fastq.gz', 'Undetermined_S0_R1_001.fastq.gz', 'Undetermined_S0_R2_001.fastq.gz', ): self._assert_file_exists(os.path.join(analysis_dir, 'bcl2fastq', f)) # Check merge of undetermined fastqs undetermined_r1 = gzip.GzipFile( os.path.join(analysis_dir, 'bcl2fastq', 'Undetermined_S0_R1_001.fastq.gz'), 'rb').read() expected_r1 = '\n'.join(fastq_reads_r1[:8]) + '\n' self.assertEqual(undetermined_r1, expected_r1) undetermined_r2 = gzip.GzipFile( os.path.join(analysis_dir, 'bcl2fastq', 'Undetermined_S0_R2_001.fastq.gz'), 'rb').read() expected_r2 = '\n'.join(fastq_reads_r2[:8]) + '\n' self.assertEqual(undetermined_r2, expected_r2) # Check projects.info files self._assert_file_exists( os.path.join(analysis_dir, 'save.projects.info')) self._assert_file_exists(os.path.join(analysis_dir, 'projects.info')) projects_info = open(os.path.join(analysis_dir, 'projects.info'), 'r').read() expected = """#Project Samples User Library Organism PI Comments AB AB1,AB2 . . . . . CDE CDE3,CDE4 . . . . . """ self.assertEqual(projects_info, expected)
def test_publish_qc_with_projects_no_reports(self): """publish_qc: projects with all QC outputs but no reports """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create() ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report and QC outputs UpdateAnalysisDir(ap).add_processing_report() for project in ap.get_analysis_projects(): UpdateAnalysisProject(project).add_qc_outputs() # Remove the QC reports for project in ap.get_analysis_projects(): qc_reports = [] qc_reports.append( "qc_report.%s.%s.zip" % (project.name, os.path.basename(ap.analysis_dir))) qc_reports.append("qc_report.html") qc_reports.append("multiqc_report.html") for f in qc_reports: os.remove(os.path.join(project.dirn, f)) # Make a mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = ["index.html", "processing_qc.html"] for project in ap.get_analysis_projects(): # Standard QC outputs project_qc = "qc_report.%s.%s" % ( project.name, os.path.basename(ap.analysis_dir)) outputs.append(project_qc) outputs.append("%s.zip" % project_qc) outputs.append(os.path.join(project_qc, "qc_report.html")) outputs.append(os.path.join(project_qc, "qc")) for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_make_fastqs_10x_chromium_sc_protocol(self): """make_fastqs: 10x_chromium_sc protocol """ # Create mock source data illumina_run = MockIlluminaRun( "171020_SN7001250_00002_AHGXXXX", "hiseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq and cellranger executables MockBcl2fastq2Exe.create(os.path.join(self.bin,"bcl2fastq")) MockCellrangerExe.create(os.path.join(self.bin,"cellranger")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) make_fastqs(ap,protocol="10x_chromium_sc") # Check parameters self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_SN7001250_00002_AHGXXXX_analysis", "primary_data")) self.assertTrue(ap.params.acquired_primary_data) # Check outputs analysis_dir = os.path.join( self.wd, "171020_SN7001250_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_SN7001250_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs_10x_chromium_sc"), "bcl2fastq", "HGXXXX",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html", "cellranger_qc_summary.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_import_project_with_qc(self): """import_project: check project with QC outputs is imported """ # Make mock multiqc MockMultiQC.create(os.path.join(self.bin, "multiqc")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Add QC outputs to the project to be imported UpdateAnalysisProject(AnalysisProject( 'NewProj', self.new_project_dir)).add_qc_outputs(include_multiqc=False) print(os.listdir(os.path.join(self.dirn, 'NewProj'))) # Check that the project is not currently present ap = AutoProcess(mockdir.dirn) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertFalse( 'NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertFalse( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Import the project import_project(ap, self.new_project_dir) self.assertTrue( 'NewProj' in [p.name for p in ap.get_analysis_projects()]) self.assertTrue('NewProj' in [p.name for p in ap.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap.analysis_dir, 'NewProj'))) # Verify via fresh AutoProcess object ap2 = AutoProcess(mockdir.dirn) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects()]) self.assertTrue( 'NewProj' in [p.name for p in ap2.get_analysis_projects_from_dirs()]) self.assertTrue( os.path.exists(os.path.join(ap2.analysis_dir, 'NewProj'))) # Check for QC report and ZIP file print(os.listdir(os.path.join(ap2.analysis_dir, 'NewProj'))) for f in ( "qc_report.html", "multiqc_report.html", "qc_report.NewProj.160621_M00879_0087_000000000-AGEW9.zip", ): f = os.path.join(ap2.analysis_dir, 'NewProj', f) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_make_fastqs_standard_protocol_stores_bases_mask(self): """make_fastqs: standard protocol stores supplied bases mask """ # Create mock source data illumina_run = MockIlluminaRun( "171020_M00879_00002_AHGXXXX", "miseq", top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq")) os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) make_fastqs(ap,protocol="standard",bases_mask="y101,I8,I8,y101") # Check parameters self.assertEqual(ap.params.bases_mask,"y101,I8,I8,y101") self.assertEqual(ap.params.primary_data_dir, os.path.join(self.wd, "171020_M00879_00002_AHGXXXX_analysis", "primary_data")) # Check outputs analysis_dir = os.path.join( self.wd, "171020_M00879_00002_AHGXXXX_analysis") for subdir in (os.path.join("primary_data", "171020_M00879_00002_AHGXXXX"), os.path.join("logs", "002_make_fastqs"), "bcl2fastq", "barcode_analysis",): self.assertTrue(os.path.isdir( os.path.join(analysis_dir,subdir)), "Missing subdir: %s" % subdir) for filen in ("statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", "projects.info", "processing_qc.html"): self.assertTrue(os.path.isfile( os.path.join(analysis_dir,filen)), "Missing file: %s" % filen)
def test_make_fastqs_samplesheet_with_invalid_characters(self): """make_fastqs: stop for invalid characters in sample sheet """ # Create mock source data with samplesheet with backspace illumina_run = MockIlluminaRun( "171020_M00879_00002_AHGXXXX", "miseq", sample_sheet_content="""[Header],,,,,,,,, IEMFileVersion,4 Date,11/23/2015 Workflow,GenerateFASTQ Application,FASTQ Only Assay,TruSeq HT Description, Chemistry,Amplicon [Reads] 101 101 [Settings] ReverseComplement,0 Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT [Data] Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description Sample1,Sample1,,,D701,CGTGTAGG,D501,GACCTGTC,,\b Sample2,Sample2,,,D702,CGTGTAGG,D501,ATGTAACT,, """, top_dir=self.wd) illumina_run.create() # Create mock bcl2fastq MockBcl2fastq2Exe.create(os.path.join(self.bin, "bcl2fastq"), platform="miseq") os.environ['PATH'] = "%s:%s" % (self.bin, os.environ['PATH']) # Do the test ap = AutoProcess(settings=self.settings) ap.setup(os.path.join(self.wd, "171020_M00879_00002_AHGXXXX")) self.assertTrue(ap.params.sample_sheet is not None) self.assertEqual(ap.params.bases_mask,"auto") self.assertTrue(ap.params.primary_data_dir is None) self.assertFalse(ap.params.acquired_primary_data) self.assertRaises(Exception, make_fastqs, ap)
def test_autoprocess_setup_existing_target_dir(self): """AutoProcess.setup works when target dir exists """ # Create mock Illumina run directory mock_illumina_run = MockIlluminaRun( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mock_illumina_run.create() # Make a mock auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Do setup into existing analysis dir ap = AutoProcess() ap.setup(mock_illumina_run.dirn) self.assertTrue(os.path.isdir('160621_M00879_0087_000000000-AGEW9'))
def test_with_project_dirs_no_projects_dot_info_no_unaligned(self): """AutoProcess.get_analysis_projects: project dirs exist (no projects.info, no unaligned) """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create() # Remove the projects.info file os.remove(os.path.join(mockdir.dirn, "projects.info")) # List the projects projects = AutoProcess(mockdir.dirn).get_analysis_projects() expected = ('AB', 'CDE', 'undetermined') self.assertEqual(len(projects), len(expected)) for p in projects: self.assertTrue(isinstance(p, AnalysisProject)) self.assertTrue(p.name in expected) for p in expected: matched_projects = [x for x in projects if x.name == p] self.assertEqual(len(matched_projects), 1)
def test_bcl2fastq2_can_be_loaded_after_rsync(self): """ merge_fastq_dirs: rsynced bcl2fastq v2 output can be loaded """ analysis_dir = self._setup_bcl2fastq2() # Merge the unaligned dirs self.ap = AutoProcess(analysis_dir, settings=self.settings) merge_fastq_dirs(self.ap, "bcl2fastq.lanes1-2", output_dir="bcl2fastq") # Check output directory exists and can be loaded self._assert_dir_exists(os.path.join(analysis_dir, 'bcl2fastq')) try: illumina_data = IlluminaData(analysis_dir, unaligned_dir='bcl2fastq') except Exception as ex: self.fail("exception loading merged directory: %s" % ex) # Rsync (with empty directories pruned) target_dir = os.path.join(self.dirn, "rsynced") os.mkdir(target_dir) applications.general.rsync( "%s/bcl2fastq" % self.ap.analysis_dir, target_dir, prune_empty_dirs=True).run_subprocess( log=os.path.join(self.dirn, "rsync.log")) # Check rsynced directory exists and can be loaded self._assert_dir_exists(os.path.join(target_dir, 'bcl2fastq')) try: illumina_data = IlluminaData(target_dir, unaligned_dir='bcl2fastq') except Exception as ex: self.fail("exception loading rsynced directory: %s" % ex)
def test_ignore_commented_projects(self): """AutoProcess.get_analysis_projects: ignore commented projects """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create() # Update the projects.info file projects_info = os.path.join(mockdir.dirn, "projects.info") with open(projects_info, "w") as fp: fp.write( """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments #AB\tAB1,AB2\tAlan Brown\tRNA-seq\t.\tHuman\tAudrey Benson\t1% PhiX CDE\tCDE3,CDE4\tClive David Edwards\tChIP-seq\t.\tMouse\tClaudia Divine Eccleston\t1% PhiX """) # List the projects projects = AutoProcess(mockdir.dirn).get_analysis_projects() expected = ('CDE', 'undetermined') self.assertEqual(len(projects), len(expected)) for p in projects: self.assertTrue(isinstance(p, AnalysisProject)) self.assertTrue(p.name in expected) for p in expected: matched_projects = [x for x in projects if x.name == p] self.assertEqual(len(matched_projects), 1)
def test_update_project_metadata_file_uncomment_existing_project(self): """ AutoProcess.update_project_metadata_file: existing project is uncommented """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) # Create projects.info file with one project already listed with open(os.path.join(mockdir.dirn, "projects.info"), 'wt') as fp: fp.write( "#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments\n#CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me" ) # Update the projects.info file AutoProcess(mockdir.dirn).update_project_metadata_file() # Check output - missing project kept but commented out with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp: self.assertEqual( fp.read(), """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments AB\tAB1,AB2\t.\t.\t.\t.\t.\t. CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\tKeep me """)
def test_update_project_metadata_file_missing_from_bcl2fastq_output(self): """ AutoProcess.update_project_metadata_file: make missing file and populate from bcl2fastq output """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) # Remove projects.info file os.remove(os.path.join(mockdir.dirn, "projects.info")) # Update the projects.info file AutoProcess(mockdir.dirn).update_project_metadata_file() # Check output with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp: self.assertEqual( fp.read(), """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments AB\tAB1,AB2\t.\t.\t.\t.\t.\t. CDE\tCDE3,CDE4\t.\t.\t.\t.\t.\t. """)
def test_make_project_metadata_file_no_bcl2fastq_output(self): """ AutoProcess.make_project_metadata_file: new 'projects.info' (no bcl2fastq output) """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) # Remove the projects.info file and the bcl2fastq output dir os.remove(os.path.join(mockdir.dirn, "projects.info")) shutil.rmtree(os.path.join(mockdir.dirn, "bcl2fastq")) # Create a new projects.info file AutoProcess(mockdir.dirn).make_project_metadata_file() # Check outputs self.assertTrue( os.path.exists(os.path.join(mockdir.dirn, "projects.info"))) with open(os.path.join(mockdir.dirn, "projects.info"), 'rt') as fp: self.assertEqual( fp.read(), """#Project\tSamples\tUser\tLibrary\tSC_Platform\tOrganism\tPI\tComments """)
def test_update_fastq_stats(self): """update_fastq_stats: generates statistics files """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '190104_M00879_0087_000000000-AGEW9', 'miseq', metadata={ "instrument_datestamp": "190104" }, top_dir=self.wd) mockdir.create(no_project_dirs=True) # Statistics files stats_files = ( "statistics.info", "statistics_full.info", "per_lane_statistics.info", "per_lane_sample_stats.info", ) # Check stats files don't already exist for filen in stats_files: self.assertFalse(os.path.exists(os.path.join(mockdir.dirn,filen)), "%s: file exists, but shouldn't" % filen) # Update (i.e. generate) stats ap = AutoProcess(mockdir.dirn) update_fastq_stats(ap) # Check files now exist for filen in stats_files: self.assertTrue(os.path.exists(os.path.join(mockdir.dirn,filen)), "%s: missing" % filen)
def test_publish_qc_processing_qc(self): """publish_qc: processing QC report only """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing report UpdateAnalysisDir(ap).add_processing_report() # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish QC publish_qc(ap, location=publication_dir) # Check outputs outputs = ("index.html", "processing_qc.html") for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_publish_qc_with_cellranger_qc_multiple_lanes_subsets(self): """publish_qc: publish cellranger QC output (multiple subsets of lanes) """ # Make an auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_K00879_0087_000000000-AGEW9', 'hiseq', metadata={ "run_number": 87, "source": "local", "instrument_datestamp": "160621" }, top_dir=self.dirn) mockdir.create(no_project_dirs=True) ap = AutoProcess(mockdir.dirn, settings=self.settings) # Add processing and cellranger QC reports UpdateAnalysisDir(ap).add_processing_report() UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="45") UpdateAnalysisDir(ap).add_cellranger_qc_output(lanes="78") # Make a mock publication area publication_dir = os.path.join(self.dirn, 'QC') os.mkdir(publication_dir) # Publish publish_qc(ap, location=publication_dir) # Check outputs outputs = [ "index.html", "processing_qc.html", "cellranger_qc_summary_45.html", "cellranger_qc_summary_78.html" ] # Do checks for item in outputs: f = os.path.join(publication_dir, "160621_K00879_0087_000000000-AGEW9_analysis", item) self.assertTrue(os.path.exists(f), "Missing %s" % f)
def test_autoprocess_setup_existing_target_dir(self): """AutoProcess.setup works when target dir exists """ # Create mock Illumina run directory mock_illumina_run = MockIlluminaRun( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mock_illumina_run.create() # Make a mock auto-process directory mockdir = MockAnalysisDirFactory.bcl2fastq2( '160621_M00879_0087_000000000-AGEW9', 'miseq', top_dir=self.dirn) mockdir.create() # Do setup into existing analysis dir ap = AutoProcess() ap.setup(mock_illumina_run.dirn) self.assertTrue(os.path.isdir( '160621_M00879_0087_000000000-AGEW9'))
def test_autoprocess_setup(self): """AutoProcess.setup works for mock MISeq run """ # Create mock Illumina run directory mock_illumina_run = MockIlluminaRun( '151125_M00879_0001_000000000-ABCDE1', 'miseq', top_dir=self.dirn) mock_illumina_run.create() # Set up autoprocessor ap = AutoProcess() ap.setup(mock_illumina_run.dirn) analysis_dirn = "%s_analysis" % mock_illumina_run.name # Check parameters self.assertEqual(ap.analysis_dir, os.path.join(self.dirn,analysis_dirn)) self.assertEqual(ap.params.data_dir,mock_illumina_run.dirn) self.assertEqual(ap.params.sample_sheet, os.path.join(self.dirn,analysis_dirn, 'custom_SampleSheet.csv')) self.assertEqual(ap.params.bases_mask, 'y101,I8,I8,y101') # Delete to force write of data to disk del(ap) # Check directory exists self.assertTrue(os.path.isdir(analysis_dirn)) # Check files exists for filen in ('SampleSheet.orig.csv', 'custom_SampleSheet.csv', 'auto_process.info', 'metadata.info',): self.assertTrue(os.path.exists(os.path.join(analysis_dirn, filen)), "Missing file: %s" % filen) # Check subdirs have been created for subdirn in ('ScriptCode', 'logs',): self.assertTrue(os.path.isdir(os.path.join(analysis_dirn, subdirn)), "Missing subdir: %s" % subdirn)
key = key_value[:i] value = key_value[i+1:].strip("'").strip('"') print "Setting '%s' to '%s'" % (key,value) __settings.set(key,value) except ValueError: logging.error("Can't process '%s'" % options.key_value) # Save the updated settings to file __settings.save() else: # Report the current configuration settings __settings.report_settings() elif cmd == 'setup': if len(args) != 1: sys.stderr.write("Need to supply a data source location\n") sys.exit(1) d = AutoProcess() if options.fastq_dir is None: d.setup(args[0], analysis_dir=options.analysis_dir, sample_sheet=options.sample_sheet) else: d.setup_from_fastq_dir(args[0],options.fastq_dir) elif cmd == 'clone': if len(args) != 2: sys.stderr.write("Need to supply an existing analysis dir and " "directory for the copy\n") sys.exit(1) d = AutoProcess(args[0],allow_save_params=False) d.clone(args[1],copy_fastqs=options.copy_fastqs) elif cmd == 'import_project': if len(args) == 0 or len(args) > 2: