예제 #1
0
    def testTwoReads(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AAAA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGCA
+
BBBB
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{'tile': '1101', 'cycle': '2'},
                           {'tile': '1102', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ANGT
+
A#AA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGNA
+
BB#B
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #2
0
    def testNoBadCycles(self):
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #3
0
    def testNoBadCycles(self):
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #4
0
def censor_sample(filename, bad_cycles_path, censored_name, read_summary_name):
    if not os.path.exists(bad_cycles_path):
        bad_cycles = []
    else:
        with open(bad_cycles_path, 'rU') as bad_cycles:
            bad_cycles = list(csv.DictReader(bad_cycles))
    with open(filename, 'rb') as fastq_src,\
            open(censored_name, 'w') as fastq_dest,\
            open(read_summary_name, 'w') as read_summary:
        censor(fastq_src, bad_cycles, fastq_dest, summary_file=read_summary)
예제 #5
0
    def testDifferentTile(self):
        self.bad_cycles = [{'tile': '1102', 'cycle': '3'}]
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #6
0
def censor_sample(filename, bad_cycles_path, censored_name, read_summary_name):
    if not os.path.exists(bad_cycles_path):
        bad_cycles = []
    else:
        with open(bad_cycles_path, 'rU') as bad_cycles:
            bad_cycles = list(csv.DictReader(bad_cycles))
    with open(filename, 'rb') as fastq_src,\
            open(censored_name, 'w') as fastq_dest,\
            open(read_summary_name, 'w') as read_summary:
        censor(fastq_src, bad_cycles, fastq_dest, summary_file=read_summary)
예제 #7
0
    def testDifferentTile(self):
        self.bad_cycles = [{'tile': '1102', 'cycle': '3'}]
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #8
0
    def testSummary(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
32.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #9
0
    def testSummary(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
32.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #10
0
    def testBadCycle(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACNT
+
AA#A
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #11
0
    def testSummaryEmpty(self):
        self.original_text = ""
        self.original_file = StringIO.StringIO(self.original_text)
        expected_summary = """\
avg_quality,base_count
,0
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #12
0
    def testBadCycle(self):
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACNT
+
AA#A
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #13
0
    def testSummaryEmpty(self):
        self.original_text = ""
        self.original_file = StringIO.StringIO(self.original_text)
        expected_summary = """\
avg_quality,base_count
,0
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #14
0
    def testDifferentDirection(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACGT
+
AAAA
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #15
0
    def testDifferentDirection(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 2:N:0:9
ACGT
+
AAAA
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_text = self.original_text

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())
예제 #16
0
    def testSummaryAverage(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AACC
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
33.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #17
0
    def testSummaryAverage(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AACC
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{'tile': '1101', 'cycle': '3'}]
        expected_summary = """\
avg_quality,base_count
33.0,4
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False,
               summary_file=self.summary_file)

        self.assertEqual(expected_summary, self.summary_file.getvalue())
예제 #18
0
    def testTwoReads(self):
        self.original_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ACGT
+
AAAA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGCA
+
BBBB
"""
        self.original_file = StringIO.StringIO(self.original_text)
        self.bad_cycles = [{
            'tile': '1101',
            'cycle': '2'
        }, {
            'tile': '1102',
            'cycle': '3'
        }]
        expected_text = """\
@M01841:45:000000000-A5FEG:1:1101:5296:13227 1:N:0:9
ANGT
+
A#AA
@M01841:45:000000000-A5FEG:1:1102:1234:12345 1:N:0:9
TGNA
+
BB#B
"""

        censor(self.original_file,
               self.bad_cycles,
               self.censored_file,
               use_gzip=False)

        self.assertEqual(expected_text, self.censored_file.getvalue())