def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_illumina_run_") # Create a fcdir self.exp_fcid = td.generate_fc_barcode() self.exp_fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=self.exp_fcid)) os.mkdir(self.exp_fcdir) # Create multiple sequence read directories self.exp_seqdir = [os.path.join(self.exp_fcdir,"Unaligned"), os.path.join(self.exp_fcdir,"Unaligned_L6"), os.path.join(self.exp_fcdir,"Unaligned_L8")] for d in self.exp_seqdir: os.mkdir(d) # Create directories for undetermined indices reads self.exp_unmatched_directory = [os.path.join(d,"Undetermined_indices") for d in self.exp_seqdir[:-1]] for d in self.exp_unmatched_directory: os.mkdir(d) # Create basecall stats directories self.exp_basecall_stats = [os.path.join(d,"Basecall_Stats_{}".format(self.exp_fcid)) for d in self.exp_seqdir[1:]] for d in self.exp_basecall_stats: os.mkdir(d) self.run = IlluminaRun(self.exp_fcdir)
def test_get_samplesheet(self): """Locate the samplesheet in a folder """ # Work in a separate subdirectory subdir = os.path.join(self.rootdir,"test_get_samplesheet") os.mkdir(subdir) # Create a few random files and folders and assert that they are not returned suffixes = [".csv","",""] for n in range(3): os.mkdir(os.path.join(subdir,''.join(random.choice(string.ascii_uppercase) for x in range(5)))) fh, _ = tempfile.mkstemp(dir=subdir, suffix=suffixes[n]) os.close(fh) self.assertIsNone(IlluminaRun.get_samplesheet(subdir), "Getting non-existing samplesheet did not return None") # Create a SampleSheet.csv and a [FCID].csv file and assert that they are # returned with a preference for the [FCID].csv file fcid = td.generate_fc_barcode() fcdir = os.path.join(subdir,td.generate_run_id(fc_barcode=fcid)) os.mkdir(fcdir) ss = [os.path.join(fcdir,"SampleSheet.csv"), os.path.join(fcdir,"{}.csv".format(fcid))] for s in ss: utils.touch_file(s) self.assertEqual(s,IlluminaRun.get_samplesheet(fcdir), "Did not get existing {}".format(os.path.basename(s))) shutil.rmtree(subdir)
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_illumina_run_") # Create a fcdir self.exp_fcid = td.generate_fc_barcode() self.exp_fcdir = os.path.join( self.rootdir, td.generate_run_id(fc_barcode=self.exp_fcid)) os.mkdir(self.exp_fcdir) # Create multiple sequence read directories self.exp_seqdir = [ os.path.join(self.exp_fcdir, "Unaligned"), os.path.join(self.exp_fcdir, "Unaligned_L6"), os.path.join(self.exp_fcdir, "Unaligned_L8") ] for d in self.exp_seqdir: os.mkdir(d) # Create directories for undetermined indices reads self.exp_unmatched_directory = [ os.path.join(d, "Undetermined_indices") for d in self.exp_seqdir[:-1] ] for d in self.exp_unmatched_directory: os.mkdir(d) # Create basecall stats directories self.exp_basecall_stats = [ os.path.join(d, "Basecall_Stats_{}".format(self.exp_fcid)) for d in self.exp_seqdir[1:] ] for d in self.exp_basecall_stats: os.mkdir(d) self.run = IlluminaRun(self.exp_fcdir)
def test_get_samplesheet(self): """Test that the _get_samplesheet method behaves as expected """ # Create a few random files and folders and assert that they are not returned suffixes = [".csv","",""] for n in range(3): os.mkdir(os.path.join(self.rootdir,''.join(random.choice(string.ascii_uppercase) for x in range(5)))) fh, _ = tempfile.mkstemp(dir=self.rootdir, suffix=suffixes[n]) os.close(fh) self.assertIsNone(sq.get_samplesheet(self.rootdir), "Getting non-existing samplesheet did not return None") # Create a SampleSheet.csv and a [FCID].csv file and assert that they are # returned with a preference for the [FCID].csv file fcid = td.generate_fc_barcode() fcdir = os.path.join(self.rootdir,td.generate_run_id(fc_barcode=fcid)) os.mkdir(fcdir) ss = [os.path.join(fcdir,"SampleSheet.csv"), os.path.join(fcdir,"{}.csv".format(fcid))] for s in ss: utils.touch_file(s) self.assertEqual(s,sq.get_samplesheet(fcdir), "Did not get existing {}".format(os.path.basename(s)))
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = {td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): []} args = {'instrument': td.generate_instrument(), 'run_number': random.randint(101,9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True} fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1","_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz",".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject"]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane']+1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata,samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = { td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [] } args = { 'instrument': td.generate_instrument(), 'run_number': random.randint(101, 9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True } fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1", "_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz", ".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([ args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject" ]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane'] + 1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata, samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2