def test_analyse_barcodes_with_bcl2fastq_dir_and_bad_samplesheet(self): """ AnalyseBarcodes: raise exception for bcl2fastq directory as input using 'bad' samplesheet """ # Make a mock bcl2fastq output directory datadir = MockIlluminaData(os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9"), "bcl2fastq2", unaligned_dir="bcl2fastq", paired_end=True) datadir.add_fastq_batch("AB", "AB1", "AB1_S1") datadir.add_fastq_batch("AB", "AB2", "AB2_S2") datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3") datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4") datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0") datadir.create() # Add data to Fastq files self._insert_fastq_reads( os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9")) # Create "bad" sample sheet with mixture of empty and # non-empty indices in a lane sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv") with open(sample_sheet, 'w') as fp: fp.write("""[Data] Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description AB1,AB1,,,D701,CGTGTAGG,D501,GACCTGAA,AB, AB2,AB2,,,D702,CGTGTAGG,D501,ATGTAACT,AB, CDE3,CDE3,,,,,,,CDE, CDE4,CDE4,,,,,,,CDE, """) # Set up and run pipeline p = AnalyseBarcodes(bcl2fastq_dir=os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq")) self.assertRaises(Exception, AnalyseBarcodes.run, os.path.join(self.wd, "barcode_analysis"), sample_sheet=sample_sheet, working_dir=self.wd, poll_interval=POLL_INTERVAL)
def create(self): """ Build and populate the directory structure Creates the directory structure on disk which has been defined within the MockAnalysisDir object. Invoke the 'remove' method to delete the directory structure. The contents of the MockAnalysisDir object can be modified after the directory structure has been created, but changes will not be reflected on disk. Instead it is necessary to first remove the directory structure, and then re-invoke the create method. create raises an OSError exception if any part of the directory structure already exists. """ MockIlluminaData.create(self) # Add (empty) metadata file with open(os.path.join(self.dirn,'metadata.info'),'w') as fp: if self.metadata is not None: for item in self.metadata: fp.write("%s\t%s\n" % (item,self.metadata[item])) else: fp.write('') # Add auto_process.info file with open(os.path.join(self.dirn,'auto_process.info'),'w') as fp: fp.write("analysis_dir\t%s\n" % os.path.basename(self.dirn)) fp.write("bases_mask\ty76,I8,I8,y76\n") fp.write("data_dir\t/mnt/data/%s\n" % self.run_name) fp.write("per_lane_stats_file\tper_lane_statistics.info\n") fp.write("primary_data_dir\t%s/primary_data/%s\n" % (self.dirn, self.run_name)) fp.write("project_metadata\tprojects.info\n") fp.write("sample_sheet\t%s/custom_SampleSheet.csv\n" % self.dirn) fp.write("stats_file\tstatistics.info\n") fp.write("unaligned_dir\tbcl2fastq\n") # Add top-level README file if self.readme is not None: open(os.path.join(self.dirn,'README'),'w').write(self.readme) # Add empty original sample sheet open(os.path.join(self.dirn,'SampleSheet.orig.csv'),'w').write('') # Initialise a custom_SampleSheet.csv with open(os.path.join(self.dirn,'custom_SampleSheet.csv'),'w') as fp: fp.write('[Data]\n') fp.write('Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description\n') # Add top-level ScriptCode directory os.mkdir(os.path.join(self.dirn,'ScriptCode')) # Add top-level logs directory os.mkdir(os.path.join(self.dirn,'logs')) # Add project dirs projects_info = open(os.path.join(self.dirn,'projects.info'),'w') projects_info.write('#%s\n' % '\t'.join(('Project', 'Samples', 'User', 'Library', 'Organism', 'PI', 'Comments'))) for project in self.projects: if project.startswith("Undetermined"): project_name = 'undetermined' else: project_name = project project_dir = MockAnalysisProject(project_name) sample_names = [] for sample in self.samples_in_project(project): sample_names.append(sample) for fq in self.fastqs_in_sample(project,sample): project_dir.add_fastq(fq) # Add line to projects.info if project_name != 'undetermined': projects_info.write('%s\n' % '\t'.join((project, ','.join(sample_names), '.', '.', '.', '.', '.'))) # Add lines to custom_SampleSheet with open(os.path.join(self.dirn,'custom_SampleSheet.csv'), 'a') as fp: for sample in self.samples_in_project(project): fp.write('%s,,,,,,%s,\n' % (sample, project_name)) # Write the project directory to disk project_dir.create(top_dir=self.dirn) # Finished return self.dirn
def main(self, args): """ Internal: provides mock bcl2fastq2 functionality """ # Build generic header header = """BCL to FASTQ file converter bcl2fastq v2.17.1.14 Copyright (c) 2007-2015 Illumina, Inc. 2015-12-17 14:08:00 [7fa113f3f780] Command-line invocation: bcl2fastq %s""" \ % ' '.join(args) # Handle version request if "--version" in args: print header return self._exit_code # Deal with arguments p = argparse.ArgumentParser() p.add_argument("--runfolder-dir", action="store") p.add_argument("--output-dir", action="store") p.add_argument("--sample-sheet", action="store") p.add_argument("--use-bases-mask", action="store") p.add_argument("--barcode-mismatches", action="store") p.add_argument("--minimum-trimmed-read-length", action="store") p.add_argument("--mask-short-adapter-reads", action="store") p.add_argument("--ignore-missing-bcls", action="store_true") p.add_argument("--no-lane-splitting", action="store_true") p.add_argument("-r", action="store") p.add_argument("-d", action="store") p.add_argument("-p", action="store") p.add_argument("-w", action="store") args = p.parse_args(args) # Check bases mask if self._assert_bases_mask: print "Checking bases mask: %s" % args.use_bases_mask assert (args.use_bases_mask == self._assert_bases_mask) # Platform print "Platform (default): %s" % self._platform # Run folder (input data) runfolder = args.runfolder_dir print "Runfolder dir: %s" % runfolder if runfolder is None: return 1 run_info_xml = os.path.join(runfolder, "RunInfo.xml") if not os.path.exists(run_info_xml): return 1 # Determine if run is paired end nreads = 0 for r in IlluminaRunInfo(run_info_xml).reads: if r['is_indexed_read'] == 'N': nreads += 1 if nreads == 2: paired_end = True else: paired_end = False print "Paired-end: %s" % paired_end # Lanes lanes = IlluminaRun(runfolder, platform=self._platform).lanes print "Lanes: %s" % lanes # Output folder output_dir = args.output_dir if output_dir is None: output_dir = "bcl2fastq" print "Output dir: %s" % output_dir # Sample sheet sample_sheet = args.sample_sheet if sample_sheet is None: for d in (runfolder, os.path.join(runfolder, "Data", "Intensities", "BaseCalls")): sample_sheet = os.path.join(d, "SampleSheet.csv") if os.path.exists(sample_sheet): break sample_sheet = None print "Sample sheet: %s" % sample_sheet # Modifiers no_lane_splitting = bool(args.no_lane_splitting) print "No lane splitting: %s" % no_lane_splitting # Generate mock output based on inputs tmpname = "tmp.%s" % uuid.uuid4() output = MockIlluminaData(name=tmpname, package="bcl2fastq2", unaligned_dir="bcl2fastq") missing_fastqs = self._missing_fastqs # Add outputs from sample sheet (if supplied) if sample_sheet is not None: s = SampleSheetPredictor(sample_sheet_file=sample_sheet) s.set(paired_end=paired_end, no_lane_splitting=no_lane_splitting, lanes=lanes) for project in s.projects: print "Adding project: %s" % project.name for sample in project.samples: for fq in sample.fastqs(): if missing_fastqs and (fq in missing_fastqs): continue if sample.sample_name is None: sample_name = sample.sample_id else: sample_name = sample.sample_name output.add_fastq(project.name, sample_name, fq) # Add undetermined fastqs # NB Would like to use the 'add_undetermined' # method but this doesn't play well with using # the predictor-based approach above if paired_end: reads = (1, 2) else: reads = (1, ) if no_lane_splitting: lanes = None for r in reads: if lanes is None: output.add_fastq("Undetermined_indices", "undetermined", "Undetermined_S0_R%d_001.fastq.gz" % r) else: for lane in lanes: output.add_fastq( "Undetermined_indices", "undetermined", "Undetermined_S0_L%03d_R%d_001.fastq.gz" % (lane, r)) # Build the output directory output.create() # Move to final location os.rename(os.path.join(tmpname, "bcl2fastq"), output_dir) shutil.rmtree(tmpname) return self._exit_code
def create(self, no_project_dirs=False): """ Build and populate the directory structure Creates the directory structure on disk which has been defined within the MockAnalysisDir object. Invoke the 'remove' method to delete the directory structure. The contents of the MockAnalysisDir object can be modified after the directory structure has been created, but changes will not be reflected on disk. Instead it is necessary to first remove the directory structure, and then re-invoke the create method. 'create' raises an OSError exception if any part of the directory structure already exists. Arguments: no_project_dirs (bool): if False then don't create analysis project subdirectories (these are created by default) """ MockIlluminaData.create(self) # Add (empty) metadata file with open(os.path.join(self.dirn, 'metadata.info'), 'w') as fp: if self.metadata is not None: for item in self.metadata: fp.write("%s\t%s\n" % (item, self.metadata[item])) else: fp.write('') # Add auto_process.info file with open(os.path.join(self.dirn, 'auto_process.info'), 'w') as fp: fp.write("analysis_dir\t%s\n" % os.path.basename(self.dirn)) fp.write("bases_mask\ty76,I8,I8,y76\n") fp.write("data_dir\t/mnt/data/%s\n" % self.run_name) fp.write("per_lane_stats_file\tper_lane_statistics.info\n") fp.write("primary_data_dir\t%s/primary_data/%s\n" % (self.dirn, self.run_name)) fp.write("project_metadata\tprojects.info\n") fp.write("sample_sheet\t%s/custom_SampleSheet.csv\n" % self.dirn) fp.write("stats_file\tstatistics.info\n") fp.write("unaligned_dir\tbcl2fastq\n") # Add top-level README file if self.readme is not None: open(os.path.join(self.dirn, 'README'), 'w').write(self.readme) # Add empty original sample sheet open(os.path.join(self.dirn, 'SampleSheet.orig.csv'), 'w').write('') # Initialise a custom_SampleSheet.csv with open(os.path.join(self.dirn, 'custom_SampleSheet.csv'), 'w') as fp: fp.write('[Data]\n') fp.write( 'Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description\n' ) # Add top-level ScriptCode directory os.mkdir(os.path.join(self.dirn, 'ScriptCode')) # Add top-level logs directory os.mkdir(os.path.join(self.dirn, 'logs')) # Add project dirs projects_info = open(os.path.join(self.dirn, 'projects.info'), 'w') projects_info.write('#%s\n' % '\t'.join( ('Project', 'Samples', 'User', 'Library', 'Organism', 'PI', 'Comments'))) for project in self.projects: if project.startswith("Undetermined"): project_name = 'undetermined' else: project_name = project try: project_metadata = self.project_metadata[project_name] except KeyError: project_metadata = dict() project_dir = MockAnalysisProject(project_name, metadata=project_metadata) sample_names = [] for sample in self.samples_in_project(project): sample_names.append(sample) for fq in self.fastqs_in_sample(project, sample): project_dir.add_fastq(fq) # Add line to projects.info if project_name != 'undetermined': projects_info.write('%s\n' % '\t'.join( (project, ','.join(sample_names), '.', '.', '.', '.', '.'))) # Add lines to custom_SampleSheet with open(os.path.join(self.dirn, 'custom_SampleSheet.csv'), 'a') as fp: for sample in self.samples_in_project(project): fp.write('%s,,,,,,%s,\n' % (sample, project_name)) # Write the project directory to disk if not no_project_dirs: project_dir.create(top_dir=self.dirn) # Finished return self.dirn
def test_analyse_barcodes_with_bcl2fastq_dir_no_samplesheet(self): """ AnalyseBarcodes: bcl2fastq directory as input (no samplesheet) """ # Make a mock bcl2fastq output directory datadir = MockIlluminaData(os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9"), "bcl2fastq2", unaligned_dir="bcl2fastq", paired_end=True) datadir.add_fastq_batch("AB", "AB1", "AB1_S1") datadir.add_fastq_batch("AB", "AB2", "AB2_S2") datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3") datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4") datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0") datadir.create() # Add data to Fastq files self._insert_fastq_reads( os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9")) # Set up and run pipeline p = AnalyseBarcodes(bcl2fastq_dir=os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq")) exit_code = p.run(os.path.join(self.wd, "barcode_analysis"), working_dir=self.wd, poll_interval=POLL_INTERVAL) # Check outputs self.assertEqual(exit_code, 0) self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis")), "Missing dir: barcode_analysis") self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")), "Missing dir: barcode_analysis/counts") for f in ( "AB.AB1_S1_L001_R1_001.fastq.gz.counts", "AB.AB2_S2_L001_R1_001.fastq.gz.counts", "CDE.CDE3_S3_L001_R1_001.fastq.gz.counts", "CDE.CDE4_S4_L001_R1_001.fastq.gz.counts", "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts" ): self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "counts", f)), "Missing file: %s" % f) self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.report")), "Missing file: barcodes.report") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.xls")), "Missing file: barcodes.xls") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.html")), "Missing file: barcodes.html") # Check that the report content is non-trivial barcodes_report = os.path.join(self.wd, "barcode_analysis", "barcodes.report") with open(barcodes_report, 'rt') as fp: contents = fp.read() self.assertTrue("Barcode analysis for lane #1" in contents) self.assertTrue( "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)" in contents) # Expect 12 lines of content in total self.assertEqual(contents.count('\n'), 12)
def test_analyse_barcodes_with_samplesheet_and_10x_indices(self): """ AnalyseBarcodes: sample sheet with 10xGenomics indices """ # Create sample sheet sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv") with open(sample_sheet, 'w') as fp: fp.write("""[Data] Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description AB1,AB1,,,D501,SI-GA-A2,AB, AB2,AB2,,,D501,SI-GA-B2,AB, CDE3,CDE3,,,D501,SI-GA-C2,CDE, CDE4,CDE4,,,D501,SI-GA-D2,CDE, """) # Set up pipeline before bcl2fastq directory exists p = AnalyseBarcodes(sample_sheet=sample_sheet) # Create the bcl2fastq directory before running pipeline datadir = MockIlluminaData(os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9"), "bcl2fastq2", unaligned_dir="bcl2fastq", paired_end=True) datadir.add_fastq_batch("AB", "AB1", "AB1_S1") datadir.add_fastq_batch("AB", "AB2", "AB2_S2") datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3") datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4") datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0") datadir.create() # Add data to Fastq files self._insert_fastq_reads( os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9")) # Run the pipeline exit_code = p.run(os.path.join(self.wd, "barcode_analysis"), bcl2fastq_dir=os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq"), working_dir=self.wd, poll_interval=POLL_INTERVAL) # Check outputs self.assertEqual(exit_code, 0) self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis")), "Missing dir: barcode_analysis") self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")), "Missing dir: barcode_analysis/counts") for f in ( "AB.AB1_S1_L001_R1_001.fastq.gz.counts", "AB.AB2_S2_L001_R1_001.fastq.gz.counts", "CDE.CDE3_S3_L001_R1_001.fastq.gz.counts", "CDE.CDE4_S4_L001_R1_001.fastq.gz.counts", "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts" ): self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "counts", f)), "Missing file: %s" % f) self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.report")), "Missing file: barcodes.report") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.xls")), "Missing file: barcodes.xls") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.html")), "Missing file: barcodes.html") # Check that the report content is non-trivial barcodes_report = os.path.join(self.wd, "barcode_analysis", "barcodes.report") with open(barcodes_report, 'rt') as fp: contents = fp.read() self.assertTrue("Barcode analysis for lane #1" in contents) self.assertTrue( "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)" in contents) # Expect 12 lines of content in total self.assertEqual(contents.count('\n'), 12)
def test_analyse_barcodes_with_multi_lane_samplesheet(self): """ AnalyseBarcodes: multi-lane sample sheet as input """ # Create sample sheet sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv") with open(sample_sheet, 'w') as fp: fp.write("""[Data] Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description 1,AB1,AB1,,,D701,CGTGTAGG,D501,GACCTGAA,AB, 1,AB2,AB2,,,D702,CGTGTAGG,D501,ATGTAACT,AB, 2,CDE3,CDE3,,,D701,GACCTGAA,D501,CGTGTAGG,CDE, 2,CDE4,CDE4,,,D702,ATGTAACT,D501,CGTGTAGG,CDE, """) # Set up pipeline before bcl2fastq directory exists p = AnalyseBarcodes(sample_sheet=sample_sheet) # Create the bcl2fastq directory before running pipeline datadir = MockIlluminaData(os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9"), "bcl2fastq2", unaligned_dir="bcl2fastq", paired_end=True) datadir.add_fastq_batch("AB", "AB1", "AB1_S1", lanes=(1, )) datadir.add_fastq_batch("AB", "AB2", "AB2_S2", lanes=(1, )) datadir.add_fastq_batch("CDE", "CDE3", "CDE3_S3", lanes=(2, )) datadir.add_fastq_batch("CDE", "CDE4", "CDE4_S4", lanes=(2, )) datadir.add_fastq_batch("", "Undetermined", "Undetermined_S0", lanes=(1, 2)) datadir.create() # Add data to Fastq files self._insert_fastq_reads( os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9")) # Run the pipeline exit_code = p.run(os.path.join(self.wd, "barcode_analysis"), bcl2fastq_dir=os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq"), working_dir=self.wd, poll_interval=POLL_INTERVAL) # Check outputs self.assertEqual(exit_code, 0) self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis")), "Missing dir: barcode_analysis") self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")), "Missing dir: barcode_analysis/counts") for f in ( "AB.AB1_S1_L001_R1_001.fastq.gz.counts", "AB.AB2_S2_L001_R1_001.fastq.gz.counts", "CDE.CDE3_S3_L002_R1_001.fastq.gz.counts", "CDE.CDE4_S4_L002_R1_001.fastq.gz.counts", "__undetermined__.Undetermined_S0_L001_R1_001.fastq.gz.counts" ): self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "counts", f)), "Missing file: %s" % f) self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.report")), "Missing file: barcodes.report") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.xls")), "Missing file: barcodes.xls") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.html")), "Missing file: barcodes.html") # Check that the report content is non-trivial barcodes_report = os.path.join(self.wd, "barcode_analysis", "barcodes.report") with open(barcodes_report, 'rt') as fp: contents = fp.read() self.assertTrue("Barcode analysis for lane #1" in contents) self.assertTrue("Barcode analysis for lane #2" in contents) self.assertTrue( "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)" in contents) self.assertTrue( "Problems detected:\n * Underrepresented samples" in contents) self.assertTrue( " 1\tTCCTGA\t\t1\t2\t100.0%\t(100.0%)" in contents) self.assertTrue( "The following samples are underrepresented:" in contents) for line in ( "AB1\tCGTGTAGG+GACCTGAA\t\t<0.1%", "AB2\tCGTGTAGG+ATGTAACT\t\t<0.1%", "CDE3\tGACCTGAA+CGTGTAGG\t\t<0.1%", "CDE4\tATGTAACT+CGTGTAGG\t\t<0.1%", ): self.assertTrue(line in contents) # Expect at least 12 lines of content in total self.assertTrue(contents.count('\n') >= 12)
def test_analyse_barcodes_with_bcl2fastq_dir_and_samplesheet_empty_index( self): """ AnalyseBarcodes: bcl2fastq directory as input (with samplesheet, empty index) """ # Make a mock bcl2fastq output directory datadir = MockIlluminaData(os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9"), "bcl2fastq2", unaligned_dir="bcl2fastq", paired_end=True) datadir.add_fastq_batch("AB", "AB1", "AB1_S1") datadir.create() # Add data to Fastq files self._insert_fastq_reads( os.path.join(self.wd, "200428_M00879_0087_000000000-AGEW9")) # Create sample sheet with single empty index sample_sheet = os.path.join(self.wd, "custom_SampleSheet.csv") with open(sample_sheet, 'w') as fp: fp.write("""[Data] Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description AB1,AB1,,,,,,,AB, """) # Set up and run pipeline p = AnalyseBarcodes(bcl2fastq_dir=os.path.join( self.wd, "200428_M00879_0087_000000000-AGEW9", "bcl2fastq")) exit_code = p.run(os.path.join(self.wd, "barcode_analysis"), sample_sheet=sample_sheet, working_dir=self.wd, poll_interval=POLL_INTERVAL) # Check outputs self.assertEqual(exit_code, 0) self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis")), "Missing dir: barcode_analysis") self.assertTrue( os.path.isdir(os.path.join(self.wd, "barcode_analysis", "counts")), "Missing dir: barcode_analysis/counts") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "counts", "AB.AB1_S1_L001_R1_001.fastq.gz.counts")), "Missing file: AB.AB1_S1_L001_R1_001.fastq.gz.counts") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.report")), "Missing file: barcodes.report") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.xls")), "Missing file: barcodes.xls") self.assertTrue( os.path.isfile( os.path.join(self.wd, "barcode_analysis", "barcodes.html")), "Missing file: barcodes.html") # Check that the report content is non-trivial barcodes_report = os.path.join(self.wd, "barcode_analysis", "barcodes.report") with open(barcodes_report, 'rt') as fp: contents = fp.read() self.assertTrue("Barcode analysis for lane #1" in contents) self.assertTrue( "#Rank\tIndex\tSample\tN_seqs\tN_reads\t%reads\t(%Total_reads)" in contents) self.assertTrue( "Problems detected:\n * Underrepresented samples" in contents) self.assertTrue( " 1\tTCCTGA\t\t1\t1\t100.0%\t(100.0%)" in contents) self.assertTrue( "The following samples are underrepresented:" in contents) self.assertTrue("AB1\t\t\t<0.1%" in contents) # Expect at least 12 lines of content in total self.assertTrue(contents.count('\n') >= 12)