Example #1
0
    def test_illumina_data_to_fastq(self):
        """illumina_data_to_fastq functions as expected """
        in1 = ("M10","68","1","1","28680","29475","0","1","AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.","BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB","0")
        expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"""
        
        self.assertEqual(illumina_data_to_fastq(in1),expected)

        expected12 = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAG\n+\nBBBBBBBBBBBB"""
        self.assertEqual(illumina_data_to_fastq(in1,number_of_bases=12),expected12)
Example #2
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    input_fps = opts.input_fps
    output_dir = opts.output_dir
    create_dir(output_dir)
    barcode_length = opts.barcode_length
    barcode_in_header = opts.barcode_in_header
    barcode_qual_c = opts.barcode_qual_c
    
    for input_fp in input_fps:
        if input_fp.endswith('.gz'):
            open_f = gzip_open
            input_basename = split(splitext(splitext(input_fp)[0])[0])[1]
        else:              
            input_basename = split(splitext(input_fp)[0])[1]
            open_f = open
        sequence_output_fp =  '%s/%s.fastq' % (output_dir,input_basename)
        sequence_output_f = open(sequence_output_fp,'w')              
        barcode_output_fp =  '%s/%s_barcodes.fastq' % (output_dir,input_basename)
        barcode_output_f = open(barcode_output_fp,'w')
        for line in open_f(input_fp):
            common_fields, sequence, sequence_qual, barcode, barcode_qual =\
             iseq_to_qseq_fields(line, barcode_in_header, barcode_length, barcode_qual_c)
            
            sequence_s, pass_filter_s = illumina_data_to_fastq((common_fields[0],
                                                              common_fields[1],
                                                              common_fields[2],
                                                              common_fields[3],
                                                              common_fields[4],
                                                              common_fields[5],
                                                              common_fields[6],
                                                              common_fields[7],
                                                              sequence,
                                                              sequence_qual))
            
            barcode_s, pass_filter_b = illumina_data_to_fastq((common_fields[0],
                                                             common_fields[1],
                                                             common_fields[2],
                                                             common_fields[3],
                                                             common_fields[4],
                                                             common_fields[5],
                                                             common_fields[6],
                                                             common_fields[7],
                                                             barcode,
                                                             barcode_qual),barcode_length)
            if pass_filter_s != 0:
                sequence_output_f.write('%s\n' % sequence_s)
                barcode_output_f.write('%s\n' % barcode_s)
        sequence_output_f.close()
        barcode_output_f.close()
Example #3
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    input_fps = opts.input_fps
    output_dir = opts.output_dir
    create_dir(output_dir)
    barcode_length = opts.barcode_length
    barcode_in_header = opts.barcode_in_header
    barcode_qual_c = opts.barcode_qual_c
    
    for input_fp in input_fps:
        if input_fp.endswith('.gz'):
            open_f = gzip_open
            input_basename = split(splitext(splitext(input_fp)[0])[0])[1]
        else:              
            input_basename = split(splitext(input_fp)[0])[1]
            open_f = open
        sequence_output_fp =  '%s/%s.fastq' % (output_dir,input_basename)
        sequence_output_f = open(sequence_output_fp,'w')              
        barcode_output_fp =  '%s/%s_barcodes.fastq' % (output_dir,input_basename)
        barcode_output_f = open(barcode_output_fp,'w')
        for line in open_f(input_fp):
            common_fields, sequence, sequence_qual, barcode, barcode_qual =\
             iseq_to_qseq_fields(line, barcode_in_header, barcode_length, barcode_qual_c)
            
            sequence_s, pass_filter_s = illumina_data_to_fastq((common_fields[0],
                                                              common_fields[1],
                                                              common_fields[2],
                                                              common_fields[3],
                                                              common_fields[4],
                                                              common_fields[5],
                                                              common_fields[6],
                                                              common_fields[7],
                                                              sequence,
                                                              sequence_qual))
            
            barcode_s, pass_filter_b = illumina_data_to_fastq((common_fields[0],
                                                             common_fields[1],
                                                             common_fields[2],
                                                             common_fields[3],
                                                             common_fields[4],
                                                             common_fields[5],
                                                             common_fields[6],
                                                             common_fields[7],
                                                             barcode,
                                                             barcode_qual),barcode_length)
            if pass_filter_s != 0:
                sequence_output_f.write('%s\n' % sequence_s)
                barcode_output_f.write('%s\n' % barcode_s)
        sequence_output_f.close()
        barcode_output_f.close()
Example #4
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)
    
    input_dir = opts.input_dir
    output_dir = opts.output_dir
    create_dir(output_dir)
    lanes = opts.lanes.split(',')
    bases = opts.bases
    read = opts.read
    
    for lane in lanes:
        read1_fps =  glob('%s/s_%s_%d_*qseq.txt' % (input_dir,
                                                   lane.replace(',',''),
                                                   read))
        # sort so results will be consistent across different runs (important
        # so amplicon and barcodes read headers will match)
        read1_fps.sort()
        for read1_fp in read1_fps:                
            output_fp =  '%s/s_%s_%s_sequences.fastq' % (output_dir,lane,read)
            output_f = open(output_fp,'w')
            for record in iter_split_lines(open(read1_fp,'U')):
                fastq_s = illumina_data_to_fastq(record,
                                                 number_of_bases=bases)
                output_f.write('%s\n' % fastq_s)
            output_f.close()
Example #5
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    input_dir = opts.input_dir
    output_dir = opts.output_dir
    create_dir(output_dir)
    lanes = opts.lanes.split(',')
    bases = opts.bases
    read = opts.read
    ignore_pass_filter = opts.ignore_pass_filter

    for lane in lanes:
        read1_fps = sorted(
            glob('%s/s_%s_%d_*qseq.txt' %
                 (input_dir, lane.replace(',', ''), read)))
        # sort so results will be consistent across different runs (important
        # so amplicon and barcodes read headers will match)
        output_fp = '%s/s_%s_%s_sequences.fastq' % (output_dir, lane, read)
        output_f = open(output_fp, 'w')
        for read1_fp in read1_fps:
            for record in iter_split_lines(open(read1_fp, 'U')):
                fastq_s, pass_filter = illumina_data_to_fastq(
                    record, number_of_bases=bases)
                if ignore_pass_filter or pass_filter != 0:
                    output_f.write('%s\n' % fastq_s)
        output_f.close()
Example #6
0
    def test_illumina_data_to_fastq_no_pass_filter_field(self):
        """illumina_data_to_fastq functions as expected with no pass filter field"""
        in1 = (
            "M10", "68", "1", "1", "28680", "29475", "0", "1",
            "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.",
            "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"
        )
        expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 2

        self.assertEqual(illumina_data_to_fastq(in1), expected)
Example #7
0
    def test_illumina_data_to_fastq(self):
        """illumina_data_to_fastq functions as expected """
        in1 = (
            "M10",
            "68",
            "1",
            "1",
            "28680",
            "29475",
            "0",
            "1",
            "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.",
            "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB",
            "0")
        expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 0

        self.assertEqual(illumina_data_to_fastq(in1), expected)

        expected12 = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAG\n+\nBBBBBBBBBBBB""", 0
        self.assertEqual(
            illumina_data_to_fastq(
                in1,
                number_of_bases=12),
            expected12)

        # different value in the pass filter field
        in2 = (
            "M10",
            "68",
            "1",
            "1",
            "28680",
            "29475",
            "0",
            "1",
            "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.",
            "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB",
            "1")
        expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 1

        self.assertEqual(illumina_data_to_fastq(in2), expected)
Example #8
0
    def test_illumina_data_to_fastq_no_pass_filter_field(self):
        """illumina_data_to_fastq functions as expected with no pass filter field"""
        in1 = (
            "M10",
            "68",
            "1",
            "1",
            "28680",
            "29475",
            "0",
            "1",
            "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.",
            "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB")
        expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 2

        self.assertEqual(illumina_data_to_fastq(in1), expected)
Example #9
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    input_dir = opts.input_dir
    output_dir = opts.output_dir
    create_dir(output_dir)
    lanes = opts.lanes.split(",")
    bases = opts.bases
    read = opts.read
    ignore_pass_filter = opts.ignore_pass_filter

    for lane in lanes:
        read1_fps = sorted(glob("%s/s_%s_%d_*qseq.txt" % (input_dir, lane.replace(",", ""), read)))
        # sort so results will be consistent across different runs (important
        # so amplicon and barcodes read headers will match)
        output_fp = "%s/s_%s_%s_sequences.fastq" % (output_dir, lane, read)
        output_f = open(output_fp, "w")
        for read1_fp in read1_fps:
            for record in iter_split_lines(open(read1_fp, "U")):
                fastq_s, pass_filter = illumina_data_to_fastq(record, number_of_bases=bases)
                if ignore_pass_filter or pass_filter != 0:
                    output_f.write("%s\n" % fastq_s)
        output_f.close()