Example #1
0
def encl_sam_genotype(sam_path, arg_dict):
    exp_dir = arg_dict['exp_dir']
    read_len = arg_dict['read_len']
    motif = arg_dict['motif']
    score_dict = { 'match':  3, \
       'mismatch': -1, \
       'gap':   -3}
    verbose = False
    pre, post = extract_pre_post_flank(exp_dir, read_len)

    nCopy_dict = {}
    total_count = 0
    with open(sam_path, 'r') as encl_handle:
        for record in csv.reader(encl_handle, dialect='excel-tab'):
            if record[0][0] != '@':
                sample = record[9]
                nCopy, pos, score = expansion_aware_realign(
                    sample, pre, post, motif, score_dict, verbose)
                if nCopy not in nCopy_dict:
                    nCopy_dict[nCopy] = 1
                else:
                    nCopy_dict[nCopy] = nCopy_dict[nCopy] + 1
                total_count = total_count + 1
            nCopy_list = nCopy_dict.keys()
            freq_list = []
            for nCopy in nCopy_list:
                freq_list.append(float(nCopy_dict[nCopy]) / float(total_count))

    return nCopy_list, freq_list
Example #2
0
parser.add_argument('--exp-dir', type=str, required=True)

args = parser.parse_args()

out_pref = args.out_pref
in_pref = args.in_pref
exp_dir = args.exp_dir

arg_dict = load_profile(exp_dir)

read_len = arg_dict['read_len']
locus = arg_dict['locus']
motif = arg_dict['motif']

chrom, locus_start, locus_end = extract_locus_info(locus)
pre, post = extract_pre_post_flank(exp_dir, read_len)

score_dict = { 'match':  3, \
    'mismatch': -1, \
    'gap':   -3}
verbose = False
margin = 2

in_sam = in_pref + '.sam'
out_sam = out_pref + '.sam'
out_sam_handle = open(out_sam, 'w')
print 'Filtering ' + in_pref + '.sam'
with open(in_sam, 'r') as in_sam_handle:
    for record in in_sam_handle:
        if record[0] == '@':
            out_sam_handle.write(record)