output_handle = open(args.write_fastq, "w") for fastq_file in fastq_files: file_handle = open(fastq_file, "rU") #create a parser to parse file handle with seqio parser = SeqIO.parse(file_handle, "fastq") for record in parser: quality = record.letter_annotations["phred_quality"] badscores = [i for i in quality if i < 20] if len(badscores) < 3: record_index = record.description.strip()[-6:] (accepted_index, index_distance) = seqmatch.smart_match(record_index, indices_list, max_dist=args.index_cutoff) if accepted_index is not None: seq = record.seq constant = str(seq[-7:]) (accepted_constant, constant_distance) = seqmatch.smart_match( constant, [ READ_CONSTANT, ], max_dist=args.const_cutoff) if accepted_constant is not None: time_point = indices_dict[accepted_index] if time_point not in constant_dists: constant_dists[time_point] = {} if constant_distance not in constant_dists[time_point]: constant_dists[time_point][constant_distance] = 0
if args.write_fastq is not None: output_handle = open(args.write_fastq, "w") for fastq_file in fastq_files: file_handle = open(fastq_file, "rU") #create a parser to parse file handle with seqio parser = SeqIO.parse(file_handle,"fastq") for record in parser: quality = record.letter_annotations["phred_quality"] badscores = [i for i in quality if i<20] if len(badscores) < 3: record_index = record.description.strip()[-6:] (accepted_index, index_distance) = seqmatch.smart_match(record_index,indices_list,max_dist=args.index_cutoff) if accepted_index is not None: seq = record.seq constant = str(seq[-7:]) (accepted_constant, constant_distance) = seqmatch.smart_match(constant, [READ_CONSTANT,], max_dist=args.const_cutoff) if accepted_constant is not None: time_point = indices_dict[accepted_index] if time_point not in constant_dists: constant_dists[time_point] = {} if constant_distance not in constant_dists[time_point]: constant_dists[time_point][constant_distance]=0 constant_dists[time_point][constant_distance]+=1 if time_point not in index_dists: index_dists[time_point] = {}
print "Pickles loaded." for timepoint in matched_index.keys(): tp_count_dict = {k: 0 for k in barcode_keys} total = 0 direct = 0 fuzzy = 0 unmatched = 0 print timepoint for barcode, quality in matched_index[timepoint]: total += 1 if args.fuzzy_cutoff is None: (accepted_index, index_distance) = seqmatch.smart_match(barcode, barcode_keys) else: (accepted_index, index_distance) = seqmatch.smart_match(barcode, barcode_keys, max_dist=args.fuzzy_cutoff) if accepted_index is not None: if index_distance == 0: tp_count_dict[accepted_index] += 1 direct += 1 else: tp_count_dict[accepted_index] += 1 fuzzy += 1 else: unmatched += 1