def main(key, remade=True): table_annotated = key + get_ending("annotation") output = get_results_file(key, 'BAD') with open(get_merged_badmaps_dict_path(remade=remade), "r") as read_file: d = json.load(read_file) rev_d = make_reverse_dict(d) badmap_file_name = rev_d[key] print('Now doing {} \n with BAD map file {}'.format(table_annotated, badmap_file_name)) badmap_file_path = create_badmaps_path_function(badmap_file_name, valid=remade) with open(badmap_file_path, 'r') as badmap_file, open(output, 'w') as out, open(table_annotated, 'r') as table_file: out.write(pack(['#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts', 'alt_read_counts', 'repeat_type'] + callers_names + ['BAD'] + ["Q{:.2f}".format(x) for x in segmentation_states] + ['SNP_count', 'sum_cover'])) u = UnpackBadSegments(None) for chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type, in_callers, \ in_intersection, segment_BAD, segment_snps, segment_snp_ids,\ segment_sumcov, Qual in \ Intersection(table_file, badmap_file, write_segment_args=True, write_intersect=True, unpack_snp_function=lambda x: unpack(x, use_in='Pcounter'), unpack_segments_function=lambda x: u.unpack_bad_segments(x, segmentation_states)): if in_intersection and ID.startswith('rs'): out.write(pack([chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type] + [in_callers[name] for name in callers_names] + [segment_BAD] + [Qual[x] for x in Qual] + [segment_snp_ids, segment_sumcov]))
def main(key, remake=False): with open(get_new_badmaps_dict_path() if remake else badmaps_dict_path, 'r') as read_file: d = json.loads(read_file.readline()) mode = 'independent' paths_list = [] for path in d[key]: if os.path.isfile(path + get_ending("vcf")): paths_list.append(path + get_ending("vcf")) out_file = create_merged_vcf_path_function(key) if mode == 'independent': merge_vcfs_as_independent_snps(out_file, paths_list) elif mode == 'add': merge_vcfs_add_counts(out_file, paths_list) else: raise ValueError(mode)
def main(base_path): exp = dict() with gzip.open(base_path + get_ending('vcf'), 'rt') as f: make_dict_from_vcf(f, exp) sorted_lines = [[chromosome, pos, ID, REF, ALT, R, A] for ((chromosome, pos, ID, REF, ALT), (R, A)) in exp.items()] sorted_lines = sorted(sorted_lines, key=lambda x: x[1]) sorted_lines = sorted(sorted_lines, key=lambda x: x[0]) if os.path.exists(repeats_path): with open(repeats_path, "r") as repeats_buffer: new_arr = [] for chromosome, pos, ID, REF, ALT, R, A, in_repeats, repeat_type \ in Intersection(sorted_lines, repeats_buffer, write_intersect=True, write_segment_args=True): if in_repeats and ID == ".": continue new_arr.append( [chromosome, pos, ID, REF, ALT, R, A, repeat_type]) sorted_lines = new_arr else: sorted_lines = [x + [''] for x in sorted_lines] for peak_type in callers_names: new_arr = [] caller_path = make_sorted_caller_path(base_path, peak_type) if os.path.isfile(caller_path): peak_file = open(caller_path, "r") else: peak_file = [] for chromosome, pos, ID, REF, ALT, R, A, repeat_type, *in_peaks in Intersection( sorted_lines, peak_file, write_intersect=True): new_arr.append([chromosome, pos, ID, REF, ALT, R, A, repeat_type] + in_peaks) sorted_lines = new_arr table_annotated_path = base_path + get_ending('annotation') with open(table_annotated_path, "w") as out: out.write( pack([ '#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts', 'alt_read_counts', 'repeat_type' ] + callers_names)) for split_line in sorted_lines: out.write(pack(split_line))
def main(for_what, remade=True): master_df = pd.read_table(master_list_path, dtype=dtype_dict) master_df = master_df[~master_df['EXP_TYPE']. isin(['chip_control', 'chipexo_control'])] master_df['path'] = master_df.apply(create_path_from_master_list_df, axis=1) master_df = master_df[master_df['path'].apply( lambda x: os.path.isfile(x + get_ending('vcf')))] if for_what == 'badmaps': with open(get_merged_badmaps_dict_path(remade=remade), "r") as read_file: d = json.load(read_file) rev_d = make_reverse_dict(d) master_df = master_df[master_df.apply( lambda row: os.path.isfile(row['path'] + get_ending( 'annotation')) and is_valid(row['path'], rev_d, remade=remade), axis=1)] master_df['path'].to_csv(out_path, sep='\t', index=False, header=False) elif for_what == 'annotation': master_df[['path', 'PEAKS']].to_csv(out_path, sep='\t', index=False, header=False)
def manual(exp, aligns): table_BAD = '/home/abramov/AlignmentsChip/{}/{}'.format( exp, aligns) + get_ending("BAD") output = '/home/abramov/test_K562_weighted_p/{}_{}'.format( exp, aligns) + get_ending("p-value") print('Now counting P-value for {}'.format(table_BAD)) df_with_BAD = pd.read_table(table_BAD) # df_with_BAD = df_with_BAD[df_with_BAD['#chr'] == 'chr2'] print(len(df_with_BAD.index)) (p_ref, p_alt, p_ref_bayes, p_alt_bayes, p_ref_likelihood, p_alt_likelihood) = count_p_adjusted( np.array(df_with_BAD["ref_read_counts"], dtype=np.int_), np.array(df_with_BAD["alt_read_counts"], dtype=np.int_), np.array(df_with_BAD["BAD"], dtype=np.float_)) df_with_BAD['p_value_ref'] = p_ref df_with_BAD['p_value_alt'] = p_alt df_with_BAD['p_value_ref_bayes'] = p_ref_bayes df_with_BAD['p_value_alt_bayes'] = p_alt_bayes df_with_BAD['p_value_ref_likelihood'] = p_ref_likelihood df_with_BAD['p_value_alt_likelihood'] = p_alt_likelihood print('i dump..') df_with_BAD.to_csv(output, sep="\t", index=False) print('i dump!')
def main(remake=False): if remake: with open(get_new_badmaps_dict_path(), 'r') as read_file: d = json.loads(read_file.readline()) else: with open(badmaps_dict_path, 'r') as read_file: d = json.loads(read_file.readline()) keys = sorted(d.keys()) with open(out_path, 'w') as file: for key in keys: is_empty = True for value in d[key]: if os.path.isfile(value + get_ending('vcf')): is_empty = False if is_empty: continue file.write(key + '\n')