def testTwoReads(self): self.original_bytes = b"""\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9 ACGT + AAAA @M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9 TGCA + BBBB """ self.original_file = BytesIO(self.original_bytes) self.bad_cycles = [{'tile': '1101', 'cycle': '2'}, {'tile': '1102', 'cycle': '3'}] expected_text = """\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9 ANGT + A#AA @M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9 TGNA + BB#B """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False) self.assertEqual(expected_text, self.censored_file.getvalue())
def testNoBadCycles(self): expected_text = self.original_unicode censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False) self.assertEqual(expected_text, self.censored_file.getvalue())
def testDifferentTile(self): self.bad_cycles = [{'tile': '1102', 'cycle': '3'}] expected_text = self.original_unicode censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False) self.assertEqual(expected_text, self.censored_file.getvalue())
def testSummary(self): self.bad_cycles = [{'tile': '1101', 'cycle': '3'}] expected_summary = """\ avg_quality,base_count 32.0,4 """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False, summary_writer=self.summary_writer) self.assertEqual(expected_summary, self.summary_file.getvalue())
def testSummaryEmpty(self): self.original_bytes = b"" self.original_file = BytesIO(self.original_bytes) expected_summary = """\ avg_quality,base_count ,0 """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False, summary_writer=self.summary_writer) self.assertEqual(expected_summary, self.summary_file.getvalue())
def testBadCycle(self): self.bad_cycles = [{'tile': '1101', 'cycle': '3'}] expected_text = """\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9 ACNT + AA#A """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False) self.assertEqual(expected_text, self.censored_file.getvalue())
def testDifferentDirection(self): self.original_bytes = b"""\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9 ACGT + AAAA """ self.original_file = BytesIO(self.original_bytes) self.bad_cycles = [{'tile': '1101', 'cycle': '3'}] expected_text = self.original_bytes.decode() censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False) self.assertEqual(expected_text, self.censored_file.getvalue())
def _test(self, read_indexes, debug_file_prefix=None): read_indexes = reversed(read_indexes) simple_filename1 = self.filename1 + '_simple.fastq' self.write_simple_fastq(simple_filename1, read_indexes) workdir = os.path.dirname(self.filename1) os.chdir(workdir) simple_filename2 = get_reverse_filename(simple_filename1) censored_filename1 = os.path.join(workdir, 'rerun.censored1.fastq') censored_filename2 = os.path.join(workdir, 'rerun.censored2.fastq') trimmed_filename1 = os.path.join(workdir, 'rerun.trimmed1.fastq') trimmed_filename2 = os.path.join(workdir, 'rerun.trimmed2.fastq') prelim_censored_filename = os.path.join(workdir, 'rerun_censored.prelim.csv') prelim_trimmed_filename = os.path.join(workdir, 'rerun_trimmed.prelim.csv') with open(self.bad_cycles_filename, 'rU') as bad_cycles: bad_cycles = list(csv.DictReader(bad_cycles)) with open(simple_filename1, 'rU') as simple1, \ open(censored_filename1, 'w') as censored1: censor(simple1, bad_cycles, censored1, use_gzip=False) with open(simple_filename2, 'rU') as simple2, \ open(censored_filename2, 'w') as censored2: censor(simple2, bad_cycles, censored2, use_gzip=False) with open(prelim_censored_filename, 'w+') as prelim_censored_csv, \ open(prelim_trimmed_filename, 'w+') as prelim_trimmed_csv: prelim_map(censored_filename1, censored_filename2, prelim_censored_csv, nthreads=BOWTIE_THREADS) trim((simple_filename1, simple_filename2), self.bad_cycles_filename, (trimmed_filename1, trimmed_filename2), use_gzip=False) prelim_map(trimmed_filename1, trimmed_filename2, prelim_trimmed_csv, nthreads=BOWTIE_THREADS) prelim_censored_csv.seek(0) censored_map_count = self.count_mapped(prelim_censored_csv) prelim_trimmed_csv.seek(0) trimmed_map_count = self.count_mapped(prelim_trimmed_csv) return self.get_result(censored_map_count, trimmed_map_count)
def testSummaryAverage(self): self.original_bytes = b"""\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9 ACGT + AACC """ self.original_file = BytesIO(self.original_bytes) self.bad_cycles = [{'tile': '1101', 'cycle': '3'}] expected_summary = """\ avg_quality,base_count 33.0,4 """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False, summary_writer=self.summary_writer) self.assertEqual(expected_summary, self.summary_file.getvalue())
def testReverseDirection(self): self.original_bytes = b"""\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9 ACGT + AAAA """ self.original_file = BytesIO(self.original_bytes) self.bad_cycles = [{'tile': '1101', 'cycle': '-3'}] expected_text = """\ @M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9 ACNT + AA#A """ censor(self.original_file, self.bad_cycles, self.censored_file, use_gzip=False, cycle_sign=-1) self.assertEqual(expected_text, self.censored_file.getvalue())