def encl_sam_genotype(sam_path, arg_dict): exp_dir = arg_dict['exp_dir'] read_len = arg_dict['read_len'] motif = arg_dict['motif'] score_dict = { 'match': 3, \ 'mismatch': -1, \ 'gap': -3} verbose = False pre, post = extract_pre_post_flank(exp_dir, read_len) nCopy_dict = {} total_count = 0 with open(sam_path, 'r') as encl_handle: for record in csv.reader(encl_handle, dialect='excel-tab'): if record[0][0] != '@': sample = record[9] nCopy, pos, score = expansion_aware_realign( sample, pre, post, motif, score_dict, verbose) if nCopy not in nCopy_dict: nCopy_dict[nCopy] = 1 else: nCopy_dict[nCopy] = nCopy_dict[nCopy] + 1 total_count = total_count + 1 nCopy_list = nCopy_dict.keys() freq_list = [] for nCopy in nCopy_list: freq_list.append(float(nCopy_dict[nCopy]) / float(total_count)) return nCopy_list, freq_list
print TLEN nc_col = 'nc:i:' + str(0) ps_col = 'ps:i:' + str(0) sc_col = 'sc:i:' + str(0) rc_col = 'rc:Z:' + 'unknown' is_col = 'is:i:' + str(int(np.abs(TLEN))) out_sam_handle.write( '\t'.join(row + [nc_col, ps_col, sc_col, rc_col, is_col]) + '\n') elif (RNAME == chrom and POS <= locus_start - read_len and RNEXT == '=' and PNEXT <= locus_start) or \ (RNAME == chrom and POS >= locus_end and RNEXT == '=' and PNEXT >= locus_end): # ignoring read pairs that are both before or after STR region. pass else: # We realign first read to check if it's pre or post flanking nCopy, pos, score = expansion_aware_realign( SEQ, pre, post, motif, score_dict, verbose) nCopy_rev, pos_rev, score_rev = expansion_aware_realign( reverse_strand(SEQ), pre, post, motif, score_dict, verbose) if score_rev > score: nCopy = nCopy_rev pos = pos_rev score = score_rev SEQ = reverse_strand(SEQ) read_class = classify_realigned_read(SEQ, motif, pos, nCopy, score, score_dict, read_len, margin, verbose) # We're allowing flanking from both sides, but one side should be mapped to the correct location for this to work. # i.e. we don't rescue the case that both reads are flanking, and both are mapped to a different locus. nc_col = 'nc:i:' + str(nCopy) ps_col = 'ps:i:' + str(pos) sc_col = 'sc:i:' + str(score)
verbose = False margin = 2 chrom, locus_start, locus_end = extract_locus_info(locus_bed) with open(in_sam, 'r') as in_sam_handle: for record in in_sam_handle: if record[0] == '@': # out_sam_handle.write(record) pass else: row = record.split() if (row[2] == chrom and int(row[3]) >= locus_start - read_len and int(row[3]) <= locus_end): # Reads mapped within region sample = row[9] nCopy, pos, score = expansion_aware_realign( sample, pre, post, motif, score_dict, verbose) read_class = classify_realigned_read(sample, motif, pos, nCopy, score, score_dict, read_len, margin, verbose) print '>> ', read_class, ':' print '>> nCopy =', nCopy print '>> score =', score print '>> pos =', pos print sample print # if (row[2] == chrom and row[6] == '=') or (row[6] == chrom): # checking correct chrom for mate # if int(row[7]) <= locus_start: # checking if mate is before STR region # if row[2] != chrom: # out_sam_handle.write(record) # print row[9]