def encl_sam_genotype(sam_path, arg_dict): exp_dir = arg_dict['exp_dir'] read_len = arg_dict['read_len'] motif = arg_dict['motif'] score_dict = { 'match': 3, \ 'mismatch': -1, \ 'gap': -3} verbose = False pre, post = extract_pre_post_flank(exp_dir, read_len) nCopy_dict = {} total_count = 0 with open(sam_path, 'r') as encl_handle: for record in csv.reader(encl_handle, dialect='excel-tab'): if record[0][0] != '@': sample = record[9] nCopy, pos, score = expansion_aware_realign( sample, pre, post, motif, score_dict, verbose) if nCopy not in nCopy_dict: nCopy_dict[nCopy] = 1 else: nCopy_dict[nCopy] = nCopy_dict[nCopy] + 1 total_count = total_count + 1 nCopy_list = nCopy_dict.keys() freq_list = [] for nCopy in nCopy_list: freq_list.append(float(nCopy_dict[nCopy]) / float(total_count)) return nCopy_list, freq_list
parser.add_argument('--exp-dir', type=str, required=True) args = parser.parse_args() out_pref = args.out_pref in_pref = args.in_pref exp_dir = args.exp_dir arg_dict = load_profile(exp_dir) read_len = arg_dict['read_len'] locus = arg_dict['locus'] motif = arg_dict['motif'] chrom, locus_start, locus_end = extract_locus_info(locus) pre, post = extract_pre_post_flank(exp_dir, read_len) score_dict = { 'match': 3, \ 'mismatch': -1, \ 'gap': -3} verbose = False margin = 2 in_sam = in_pref + '.sam' out_sam = out_pref + '.sam' out_sam_handle = open(out_sam, 'w') print 'Filtering ' + in_pref + '.sam' with open(in_sam, 'r') as in_sam_handle: for record in in_sam_handle: if record[0] == '@': out_sam_handle.write(record)