def test_get_projects(self): """Test that getting the projects from a flowcell works """ # Assert that an empty file returns an empty list fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv") os.close(fh) self.assertListEqual([],sq.get_projects(ssheet), "The list of projects for an empty file is not empty") # Generate artificial samplesheet data data = td.generate_samplesheet_data() projects = {} for d in data: projects[d[-1]] = 1 # Write the data to a samplesheet td._write_samplesheet(data,ssheet) # Assert that the list of projects returned is the same that we generated self.assertListEqual(sorted(projects.keys()),sorted(sq.get_projects(ssheet)), "The list of projects does not match the original list") # Assert that the list of projects returned is filtered as specified self.assertListEqual([projects.keys()[-1]],sq.get_projects(ssheet,projects.keys()[-1]), "The filtered list of projects does not match the expected")
def test_get_project_names(self): """Get the projects from a samplesheet """ # Assert that an empty file returns an empty list fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv") os.close(fh) self.assertListEqual([],HiSeqRun.get_project_names(ssheet), "The list of projects for an empty file is not empty") # Generate artificial samplesheet data data = td.generate_samplesheet_data() projects = {} for d in data: projects[d[-1]] = 1 # Write the data to a samplesheet td._write_samplesheet(data,ssheet) # Assert that the list of projects returned is the same that we generated self.assertListEqual(sorted(projects.keys()),sorted(HiSeqRun.get_project_names(ssheet)), "The list of projects does not match the original list")
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = {td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): []} args = {'instrument': td.generate_instrument(), 'run_number': random.randint(101,9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True} fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1","_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz",".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject"]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane']+1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata,samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2
def setUp(self): self.rootdir = tempfile.mkdtemp(prefix="test_fastq_utils_") # Generate some test fastq_files indexes = { td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [], td.generate_barcode(): [] } args = { 'instrument': td.generate_instrument(), 'run_number': random.randint(101, 9999), 'fcid': td.generate_fc_barcode(), 'lane': 1, 'pair': True } fd, f1 = tempfile.mkstemp(suffix="_R1.fastq.gz", dir=self.rootdir) os.close(fd) f2 = f1.replace("_R1", "_R2") f1h = fu.FastQWriter(f1) f2h = fu.FastQWriter(f2) for n in range(1000): args['index'] = random.choice(indexes.keys()) record = td.generate_fastq_record(**args) indexes[args['index']].append(record[0]) f1h.write(record[0:4]) f2h.write(record[4:]) f1h.close() f2h.close() # Create a samplesheet to use for demultiplexing using all but the last index samplesheet = f1.replace("_R1.fastq.gz", ".csv") sdata = [] for n, index in enumerate(indexes.keys()[0:-1]): sdata.append([ args['fcid'], str(args['lane']), "Sample_{}".format(str(n)), "unknown", index, "DemuxTest", "0", "", "", "DemuxTestProject" ]) # Add an entry that will not match s = copy.copy(sdata[-1]) s[1] = str(args['lane'] + 1) sdata.append(s) self.samplesheet = td._write_samplesheet(sdata, samplesheet) self.indexes = indexes self.fastq_1 = f1 self.fastq_2 = f2
def test_get_project_samples(self): """Test that getting the project samples from a samplesheet behaves as expected """ # Generate artificial samplesheet data data = td.generate_samplesheet_data() fh, ssheet = tempfile.mkstemp(dir=self.rootdir, suffix=".csv") os.close(fh) td._write_samplesheet(data,ssheet) # Assert that getting samples for a non-existing project returns an empty list self.assertListEqual([],sq.get_project_samples(ssheet,td.generate_project()), "Getting samples for a non-existing project returned unexpected output") # Iterate over the projects and assert that the returned samples are correct samples = {} for row in data: if row[9] not in samples: samples[row[9]] = [] samples[row[9]].append(row[2]) for proj, sample in samples.items(): self.assertListEqual(sorted(sample),sorted(sq.get_project_samples(ssheet,proj)), "The returned list of samples did not match the original")