Esempio n. 1
0
 def test_extract_barcodes_paired_end(self):
     """ Extracts barcodes from paired-end reads """
     
     fastq1_lines =\
      "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n')
     fastq2_lines =\
      "@HWI-ST830\nGGGGTTTTAAAACCCC\n+\n1234567890ABCDEF".split('\n') 
     
     extract_barcodes(fastq1 = fastq1_lines, fastq2 = fastq2_lines,
      input_type = "barcode_paired_end", output_dir = self.output_dir)
     
     output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
     actual_bcs = [line for line in output_bcs_fp]
     expected_bcs =\
      ['@HWI-ST830\n', 'AAAATTGGGGTT\n', '+\n', '123456123456\n']
     
     self.assertEqual(actual_bcs, expected_bcs) 
     
     # reads 1 output
     output_reads_fp = open(join(self.output_dir, "reads1.fastq"), "U")
     actual_reads = [line for line in output_reads_fp]
     expected_reads = ['@HWI-ST830\n', 'TTCCCCGGGG\n', '+\n', '7890ABCDEF\n']
     
     self.assertEqual(actual_reads, expected_reads)
     # reads 2 output
     output_reads_fp = open(join(self.output_dir, "reads2.fastq"), "U")
     actual_reads = [line for line in output_reads_fp]
     expected_reads = ['@HWI-ST830\n', 'TTAAAACCCC\n', '+\n', '7890ABCDEF\n']
     
     self.assertEqual(actual_reads, expected_reads)
Esempio n. 2
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.attempt_read_reorientation:
        if not opts.mapping_fp:
            option_parser.error("To use --attempt_read_reorientation, one must "
                                "supply a mapping file that contains both LinkerPrimerSequence "
                                "and ReversePrimer columns.")
    if opts.input_type == "barcode_paired_end":
        if not opts.fastq2:
            option_parser.error("To use input_type of barcode_paired_end, "
                                "a second fastq file must be specified with --fastq2")

    if not opts.fastq2:
        disable_header_match = True
    else:
        disable_header_match = opts.disable_header_match

    fastq1 = qiime_open(opts.fastq1)
    if opts.fastq2:
        fastq2 = qiime_open(opts.fastq2)
    else:
        fastq2 = None
    create_dir(opts.output_dir)
    if opts.mapping_fp:
        map_fp = qiime_open(opts.mapping_fp)
    else:
        map_fp = None

    extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type,
                     opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2,
                     opts.char_delineator, opts.switch_bc_order, map_fp,
                     opts.attempt_read_reorientation, disable_header_match)
Esempio n. 3
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    if opts.attempt_read_reorientation:
        if not opts.mapping_fp:
            option_parser.error("To use --attempt_read_reorientation, one must "
                                "supply a mapping file that contains both LinkerPrimerSequence "
                                "and ReversePrimer columns.")
    if opts.input_type == "barcode_paired_end":
        if not opts.fastq2:
            option_parser.error("To use input_type of barcode_paired_end, "
                                "a second fastq file must be specified with --fastq2")

    if not opts.fastq2:
        disable_header_match = True
    else:
        disable_header_match = opts.disable_header_match

    fastq1 = qiime_open(opts.fastq1)
    if opts.fastq2:
        fastq2 = qiime_open(opts.fastq2)
    else:
        fastq2 = None
    create_dir(opts.output_dir)
    if opts.mapping_fp:
        map_fp = qiime_open(opts.mapping_fp)
    else:
        map_fp = None

    extract_barcodes(fastq1, fastq2, opts.output_dir, opts.input_type,
                     opts.bc1_len, opts.bc2_len, opts.rev_comp_bc1, opts.rev_comp_bc2,
                     opts.char_delineator, opts.switch_bc_order, map_fp,
                     opts.attempt_read_reorientation, disable_header_match)
Esempio n. 4
0
    def test_extract_barcodes_from_labels(self):
        """ Extracts barcodes from fastq labels """

        fastq_lines =\
            "@HWI-ST830:GTATCT\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n')

        extract_barcodes(fastq_lines, input_type="barcode_in_label",
                         output_dir=self.output_dir, disable_header_match=True)

        output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
        actual_bcs = [line for line in output_bcs_fp]
        expected_bcs =\
            ['@HWI-ST830:GTATCT\n', 'GTATCT\n', '+\n', 'FFFFFF\n']

        self.assertEqual(actual_bcs, expected_bcs)
Esempio n. 5
0
    def test_extract_barcodes_from_labels(self):
        """ Extracts barcodes from fastq labels """

        fastq_lines =\
            "@HWI-ST830:GTATCT\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n')

        extract_barcodes(fastq_lines, input_type="barcode_in_label",
                         output_dir=self.output_dir, disable_header_match=True)

        output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
        actual_bcs = [line for line in output_bcs_fp]
        expected_bcs =\
            ['@HWI-ST830:GTATCT\n', 'GTATCT\n', '+\n', "''''''\n"]

        self.assertEqual(actual_bcs, expected_bcs)
Esempio n. 6
0
 def test_extract_barcodes_single_end(self):
     """ Extracts barcodes from single-end read """
     
     fastq_lines =\
      "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n')
     
     extract_barcodes(fastq_lines, output_dir = self.output_dir,
      disable_header_match = True)
     
     output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
     actual_bcs = [line for line in output_bcs_fp]
     expected_bcs = ['@HWI-ST830\n', 'AAAATT\n', '+\n', '123456\n']
     
     self.assertEqual(actual_bcs, expected_bcs) 
     
     output_reads_fp = open(join(self.output_dir, "reads.fastq"), "U")
     actual_reads = [line for line in output_reads_fp]
     expected_reads = ['@HWI-ST830\n', 'TTCCCCGGGG\n', '+\n', '7890ABCDEF\n']
     
     self.assertEqual(actual_reads, expected_reads)
Esempio n. 7
0
    def test_extract_barcodes_stitched_reads(self):
        """ Extracts barcodes from ends of a single read """

        fastq_lines =\
            "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF\n".split('\n')

        extract_barcodes(fastq_lines, input_type="barcode_paired_stitched",
                         output_dir=self.output_dir, disable_header_match=True)

        output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
        actual_bcs = [line for line in output_bcs_fp]
        expected_bcs =\
            ['@HWI-ST830\n', 'AAAATTCCGGGG\n', '+\n', '123456ABCDEF\n']

        self.assertEqual(actual_bcs, expected_bcs)

        output_reads_fp = open(join(self.output_dir, "reads.fastq"), "U")
        actual_reads = [line for line in output_reads_fp]
        expected_reads = ['@HWI-ST830\n', 'TTCC\n', '+\n', '7890\n']

        self.assertEqual(actual_reads, expected_reads)
Esempio n. 8
0
    def test_extract_barcodes_paired_end(self):
        """ Extracts barcodes from paired-end reads """

        fastq1_lines =\
            "@HWI-ST830\nAAAATTTTCCCCGGGG\n+\n1234567890ABCDEF".split('\n')
        fastq2_lines =\
            "@HWI-ST830\nGGGGTTTTAAAACCCC\n+\n1234567890ABCDEF".split('\n')

        extract_barcodes(fastq1=fastq1_lines, fastq2=fastq2_lines,
                         input_type="barcode_paired_end", output_dir=self.output_dir)

        output_bcs_fp = open(join(self.output_dir, "barcodes.fastq"), "U")
        actual_bcs = [line for line in output_bcs_fp]
        expected_bcs =\
            ['@HWI-ST830\n', 'AAAATTGGGGTT\n', '+\n', '123456123456\n']

        self.assertEqual(actual_bcs, expected_bcs)

        # reads 1 output
        output_reads_fp = open(join(self.output_dir, "reads1.fastq"), "U")
        actual_reads = [line for line in output_reads_fp]
        expected_reads = [
            '@HWI-ST830\n',
            'TTCCCCGGGG\n',
            '+\n',
            '7890ABCDEF\n']

        self.assertEqual(actual_reads, expected_reads)
        # reads 2 output
        output_reads_fp = open(join(self.output_dir, "reads2.fastq"), "U")
        actual_reads = [line for line in output_reads_fp]
        expected_reads = [
            '@HWI-ST830\n',
            'TTAAAACCCC\n',
            '+\n',
            '7890ABCDEF\n']

        self.assertEqual(actual_reads, expected_reads)