Exemple #1
0
    output_handle = open(args.write_fastq, "w")

for fastq_file in fastq_files:
    file_handle = open(fastq_file, "rU")

    #create a parser to parse file handle with seqio
    parser = SeqIO.parse(file_handle, "fastq")

    for record in parser:
        quality = record.letter_annotations["phred_quality"]
        badscores = [i for i in quality if i < 20]
        if len(badscores) < 3:
            record_index = record.description.strip()[-6:]
            (accepted_index,
             index_distance) = seqmatch.smart_match(record_index,
                                                    indices_list,
                                                    max_dist=args.index_cutoff)
            if accepted_index is not None:
                seq = record.seq
                constant = str(seq[-7:])
                (accepted_constant, constant_distance) = seqmatch.smart_match(
                    constant, [
                        READ_CONSTANT,
                    ], max_dist=args.const_cutoff)
                if accepted_constant is not None:
                    time_point = indices_dict[accepted_index]

                    if time_point not in constant_dists:
                        constant_dists[time_point] = {}
                    if constant_distance not in constant_dists[time_point]:
                        constant_dists[time_point][constant_distance] = 0
if args.write_fastq is not None:
    output_handle = open(args.write_fastq, "w")

for fastq_file in fastq_files:
    file_handle = open(fastq_file, "rU")

    #create a parser to parse file handle with seqio
    parser = SeqIO.parse(file_handle,"fastq")

    for record in parser:
        quality = record.letter_annotations["phred_quality"]
        badscores = [i for i in quality if i<20]
        if len(badscores) < 3:
            record_index = record.description.strip()[-6:]
            (accepted_index, index_distance) = seqmatch.smart_match(record_index,indices_list,max_dist=args.index_cutoff)
            if accepted_index is not None:
                seq = record.seq
                constant = str(seq[-7:])
                (accepted_constant, constant_distance) = seqmatch.smart_match(constant, [READ_CONSTANT,], max_dist=args.const_cutoff)
                if accepted_constant is not None:
                    time_point = indices_dict[accepted_index]
                    
                    if time_point not in constant_dists:
                        constant_dists[time_point] = {}
                    if constant_distance not in constant_dists[time_point]:
                        constant_dists[time_point][constant_distance]=0
                    constant_dists[time_point][constant_distance]+=1

                    if time_point not in index_dists:
                        index_dists[time_point] = {}
print "Pickles loaded."

for timepoint in matched_index.keys():
    tp_count_dict = {k: 0 for k in barcode_keys}
    total = 0
    direct = 0
    fuzzy = 0
    unmatched = 0
    print timepoint
    for barcode, quality in matched_index[timepoint]:
        total += 1

        if args.fuzzy_cutoff is None:
            (accepted_index,
             index_distance) = seqmatch.smart_match(barcode, barcode_keys)
        else:
            (accepted_index,
             index_distance) = seqmatch.smart_match(barcode,
                                                    barcode_keys,
                                                    max_dist=args.fuzzy_cutoff)

        if accepted_index is not None:
            if index_distance == 0:
                tp_count_dict[accepted_index] += 1
                direct += 1
            else:
                tp_count_dict[accepted_index] += 1
                fuzzy += 1
        else:
            unmatched += 1