def test_illumina_data_to_fastq(self): """illumina_data_to_fastq functions as expected """ in1 = ("M10","68","1","1","28680","29475","0","1","AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.","BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB","0") expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""" self.assertEqual(illumina_data_to_fastq(in1),expected) expected12 = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAG\n+\nBBBBBBBBBBBB""" self.assertEqual(illumina_data_to_fastq(in1,number_of_bases=12),expected12)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_fps = opts.input_fps output_dir = opts.output_dir create_dir(output_dir) barcode_length = opts.barcode_length barcode_in_header = opts.barcode_in_header barcode_qual_c = opts.barcode_qual_c for input_fp in input_fps: if input_fp.endswith('.gz'): open_f = gzip_open input_basename = split(splitext(splitext(input_fp)[0])[0])[1] else: input_basename = split(splitext(input_fp)[0])[1] open_f = open sequence_output_fp = '%s/%s.fastq' % (output_dir,input_basename) sequence_output_f = open(sequence_output_fp,'w') barcode_output_fp = '%s/%s_barcodes.fastq' % (output_dir,input_basename) barcode_output_f = open(barcode_output_fp,'w') for line in open_f(input_fp): common_fields, sequence, sequence_qual, barcode, barcode_qual =\ iseq_to_qseq_fields(line, barcode_in_header, barcode_length, barcode_qual_c) sequence_s, pass_filter_s = illumina_data_to_fastq((common_fields[0], common_fields[1], common_fields[2], common_fields[3], common_fields[4], common_fields[5], common_fields[6], common_fields[7], sequence, sequence_qual)) barcode_s, pass_filter_b = illumina_data_to_fastq((common_fields[0], common_fields[1], common_fields[2], common_fields[3], common_fields[4], common_fields[5], common_fields[6], common_fields[7], barcode, barcode_qual),barcode_length) if pass_filter_s != 0: sequence_output_f.write('%s\n' % sequence_s) barcode_output_f.write('%s\n' % barcode_s) sequence_output_f.close() barcode_output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_dir = opts.input_dir output_dir = opts.output_dir create_dir(output_dir) lanes = opts.lanes.split(',') bases = opts.bases read = opts.read for lane in lanes: read1_fps = glob('%s/s_%s_%d_*qseq.txt' % (input_dir, lane.replace(',',''), read)) # sort so results will be consistent across different runs (important # so amplicon and barcodes read headers will match) read1_fps.sort() for read1_fp in read1_fps: output_fp = '%s/s_%s_%s_sequences.fastq' % (output_dir,lane,read) output_f = open(output_fp,'w') for record in iter_split_lines(open(read1_fp,'U')): fastq_s = illumina_data_to_fastq(record, number_of_bases=bases) output_f.write('%s\n' % fastq_s) output_f.close()
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) input_dir = opts.input_dir output_dir = opts.output_dir create_dir(output_dir) lanes = opts.lanes.split(',') bases = opts.bases read = opts.read ignore_pass_filter = opts.ignore_pass_filter for lane in lanes: read1_fps = sorted( glob('%s/s_%s_%d_*qseq.txt' % (input_dir, lane.replace(',', ''), read))) # sort so results will be consistent across different runs (important # so amplicon and barcodes read headers will match) output_fp = '%s/s_%s_%s_sequences.fastq' % (output_dir, lane, read) output_f = open(output_fp, 'w') for read1_fp in read1_fps: for record in iter_split_lines(open(read1_fp, 'U')): fastq_s, pass_filter = illumina_data_to_fastq( record, number_of_bases=bases) if ignore_pass_filter or pass_filter != 0: output_f.write('%s\n' % fastq_s) output_f.close()
def test_illumina_data_to_fastq_no_pass_filter_field(self): """illumina_data_to_fastq functions as expected with no pass filter field""" in1 = ( "M10", "68", "1", "1", "28680", "29475", "0", "1", "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.", "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" ) expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 2 self.assertEqual(illumina_data_to_fastq(in1), expected)
def test_illumina_data_to_fastq(self): """illumina_data_to_fastq functions as expected """ in1 = ( "M10", "68", "1", "1", "28680", "29475", "0", "1", "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.", "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB", "0") expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 0 self.assertEqual(illumina_data_to_fastq(in1), expected) expected12 = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAG\n+\nBBBBBBBBBBBB""", 0 self.assertEqual( illumina_data_to_fastq( in1, number_of_bases=12), expected12) # different value in the pass filter field in2 = ( "M10", "68", "1", "1", "28680", "29475", "0", "1", "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.", "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB", "1") expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 1 self.assertEqual(illumina_data_to_fastq(in2), expected)
def test_illumina_data_to_fastq_no_pass_filter_field(self): """illumina_data_to_fastq functions as expected with no pass filter field""" in1 = ( "M10", "68", "1", "1", "28680", "29475", "0", "1", "AACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACG.", "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB") expected = """@M10_68:1:1:28680:29475#0/1\nAACGAAAGGCAGTTTTGGAAGTAGGCGAATTAGGGTAACGCATATAGGATGCTAATACAACGTGAATGAAGTACTGCATCTATGTCACCAGCTTATTACAGCAGCTTGTCATACATGGCCGTACAGGAAACACACATCATAGCATCACACGN\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB""", 2 self.assertEqual(illumina_data_to_fastq(in1), expected)
def main(): option_parser, opts, args = parse_command_line_parameters(**script_info) input_dir = opts.input_dir output_dir = opts.output_dir create_dir(output_dir) lanes = opts.lanes.split(",") bases = opts.bases read = opts.read ignore_pass_filter = opts.ignore_pass_filter for lane in lanes: read1_fps = sorted(glob("%s/s_%s_%d_*qseq.txt" % (input_dir, lane.replace(",", ""), read))) # sort so results will be consistent across different runs (important # so amplicon and barcodes read headers will match) output_fp = "%s/s_%s_%s_sequences.fastq" % (output_dir, lane, read) output_f = open(output_fp, "w") for read1_fp in read1_fps: for record in iter_split_lines(open(read1_fp, "U")): fastq_s, pass_filter = illumina_data_to_fastq(record, number_of_bases=bases) if ignore_pass_filter or pass_filter != 0: output_f.write("%s\n" % fastq_s) output_f.close()