Example #1
0
    def testTwoReads(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AAAA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGCA
+
BBBB
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '2'},
                           {'tile': '1102', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ANGT
+
A#AA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGNA
+
BB#B
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #2
0
    def testTwoReads(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AAAA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGCA
+
BBBB
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '2'},
                           {'tile': '1102', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ANGT
+
A#AA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGNA
+
BB#B
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #3
0
    def testNoBadCycles(self):
        expected_text = self.original_unicode

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #4
0
    def testNoBadCycles(self):
        expected_text = self.original_unicode

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #5
0
    def testDifferentTile(self):
        self.bad_cycles = [{'tile': '1102', 'cycle': '3'}]
        expected_text = self.original_unicode

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #6
0
    def testDifferentTile(self):
        self.bad_cycles = [{'tile': '1102', 'cycle': '3'}]
        expected_text = self.original_unicode

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #7
0
    def testSummary(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
32.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #8
0
    def testSummary(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
32.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #9
0
    def testSummaryEmpty(self):
        self.original_bytes = b""
        self.original_file = BytesIO(self.original_bytes)
        expected_summary = """\
avg_quality,base_count
,0
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #10
0
    def testBadCycle(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACNT
+
AA#A
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #11
0
    def testBadCycle(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACNT
+
AA#A
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #12
0
    def testSummaryEmpty(self):
        self.original_bytes = b""
        self.original_file = BytesIO(self.original_bytes)
        expected_summary = """\
avg_quality,base_count
,0
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #13
0
    def testDifferentDirection(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACGT
+
AAAA
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = self.original_bytes.decode()

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #14
0
    def testDifferentDirection(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACGT
+
AAAA
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = self.original_bytes.decode()

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
Example #15
0
    def _test(self, read_indexes, debug_file_prefix=None):
        read_indexes = reversed(read_indexes)
        simple_filename1 = self.filename1 + '_simple.fastq'
        self.write_simple_fastq(simple_filename1, read_indexes)
        workdir = os.path.dirname(self.filename1)
        os.chdir(workdir)
        simple_filename2 = get_reverse_filename(simple_filename1)
        censored_filename1 = os.path.join(workdir, 'rerun.censored1.fastq')
        censored_filename2 = os.path.join(workdir, 'rerun.censored2.fastq')
        trimmed_filename1 = os.path.join(workdir, 'rerun.trimmed1.fastq')
        trimmed_filename2 = os.path.join(workdir, 'rerun.trimmed2.fastq')
        prelim_censored_filename = os.path.join(workdir, 'rerun_censored.prelim.csv')
        prelim_trimmed_filename = os.path.join(workdir, 'rerun_trimmed.prelim.csv')
        with open(self.bad_cycles_filename, 'rU') as bad_cycles:
            bad_cycles = list(csv.DictReader(bad_cycles))
        with open(simple_filename1, 'rU') as simple1, \
                open(censored_filename1, 'w') as censored1:
            censor(simple1, bad_cycles, censored1, use_gzip=False)
        with open(simple_filename2, 'rU') as simple2, \
                open(censored_filename2, 'w') as censored2:
            censor(simple2, bad_cycles, censored2, use_gzip=False)
        with open(prelim_censored_filename, 'w+') as prelim_censored_csv, \
                open(prelim_trimmed_filename, 'w+') as prelim_trimmed_csv:
            prelim_map(censored_filename1,
                       censored_filename2,
                       prelim_censored_csv,
                       nthreads=BOWTIE_THREADS)
            trim((simple_filename1, simple_filename2),
                 self.bad_cycles_filename,
                 (trimmed_filename1, trimmed_filename2),
                 use_gzip=False)
            prelim_map(trimmed_filename1,
                       trimmed_filename2,
                       prelim_trimmed_csv,
                       nthreads=BOWTIE_THREADS)
            prelim_censored_csv.seek(0)
            censored_map_count = self.count_mapped(prelim_censored_csv)
            prelim_trimmed_csv.seek(0)
            trimmed_map_count = self.count_mapped(prelim_trimmed_csv)

        return self.get_result(censored_map_count, trimmed_map_count)
Example #16
0
    def _test(self, read_indexes, debug_file_prefix=None):
        read_indexes = reversed(read_indexes)
        simple_filename1 = self.filename1 + '_simple.fastq'
        self.write_simple_fastq(simple_filename1, read_indexes)
        workdir = os.path.dirname(self.filename1)
        os.chdir(workdir)
        simple_filename2 = get_reverse_filename(simple_filename1)
        censored_filename1 = os.path.join(workdir, 'rerun.censored1.fastq')
        censored_filename2 = os.path.join(workdir, 'rerun.censored2.fastq')
        trimmed_filename1 = os.path.join(workdir, 'rerun.trimmed1.fastq')
        trimmed_filename2 = os.path.join(workdir, 'rerun.trimmed2.fastq')
        prelim_censored_filename = os.path.join(workdir, 'rerun_censored.prelim.csv')
        prelim_trimmed_filename = os.path.join(workdir, 'rerun_trimmed.prelim.csv')
        with open(self.bad_cycles_filename, 'rU') as bad_cycles:
            bad_cycles = list(csv.DictReader(bad_cycles))
        with open(simple_filename1, 'rU') as simple1, \
                open(censored_filename1, 'w') as censored1:
            censor(simple1, bad_cycles, censored1, use_gzip=False)
        with open(simple_filename2, 'rU') as simple2, \
                open(censored_filename2, 'w') as censored2:
            censor(simple2, bad_cycles, censored2, use_gzip=False)
        with open(prelim_censored_filename, 'w+') as prelim_censored_csv, \
                open(prelim_trimmed_filename, 'w+') as prelim_trimmed_csv:
            prelim_map(censored_filename1,
                       censored_filename2,
                       prelim_censored_csv,
                       nthreads=BOWTIE_THREADS)
            trim((simple_filename1, simple_filename2),
                 self.bad_cycles_filename,
                 (trimmed_filename1, trimmed_filename2),
                 use_gzip=False)
            prelim_map(trimmed_filename1,
                       trimmed_filename2,
                       prelim_trimmed_csv,
                       nthreads=BOWTIE_THREADS)
            prelim_censored_csv.seek(0)
            censored_map_count = self.count_mapped(prelim_censored_csv)
            prelim_trimmed_csv.seek(0)
            trimmed_map_count = self.count_mapped(prelim_trimmed_csv)

        return self.get_result(censored_map_count, trimmed_map_count)
Example #17
0
    def testSummaryAverage(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AACC
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
33.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #18
0
    def testSummaryAverage(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AACC
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
33.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_writer=self.summary_writer)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
Example #19
0
    def testReverseDirection(self):
        self.original_bytes = b"""\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACGT
+
AAAA
"""
        self.original_file = BytesIO(self.original_bytes)
        self.bad_cycles = [{'tile': '1101', 'cycle': '-3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACNT
+
AA#A
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               cycle_sign=-1)

        self.assertEqual(expected_text, self.censored_file.getvalue())