def main(key, remade=True):
    table_annotated = key + get_ending("annotation")
    output = get_results_file(key, 'BAD')

    with open(get_merged_badmaps_dict_path(remade=remade), "r") as read_file:
        d = json.load(read_file)
        rev_d = make_reverse_dict(d)

    badmap_file_name = rev_d[key]

    print('Now doing {} \n with BAD map file {}'.format(table_annotated, badmap_file_name))
    badmap_file_path = create_badmaps_path_function(badmap_file_name, valid=remade)
    with open(badmap_file_path, 'r') as badmap_file, open(output, 'w') as out, open(table_annotated, 'r') as table_file:
        out.write(pack(['#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts', 'alt_read_counts',
                        'repeat_type'] + callers_names + ['BAD'] + ["Q{:.2f}".format(x) for x in segmentation_states] +
                       ['SNP_count', 'sum_cover']))

        u = UnpackBadSegments(None)
        for chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type, in_callers, \
            in_intersection, segment_BAD, segment_snps, segment_snp_ids,\
                    segment_sumcov, Qual in \
                Intersection(table_file, badmap_file, write_segment_args=True, write_intersect=True,
                             unpack_snp_function=lambda x: unpack(x, use_in='Pcounter'),
                             unpack_segments_function=lambda x: u.unpack_bad_segments(x, segmentation_states)):
            if in_intersection and ID.startswith('rs'):
                out.write(pack([chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type] +
                               [in_callers[name] for name in callers_names] +
                               [segment_BAD] + [Qual[x] for x in Qual] + [segment_snp_ids, segment_sumcov]))
Esempio n. 2
0
def main(key, remake=False):
    with open(get_new_badmaps_dict_path() if remake else badmaps_dict_path, 'r') as read_file:
        d = json.loads(read_file.readline())
    mode = 'independent'

    paths_list = []
    for path in d[key]:
        if os.path.isfile(path + get_ending("vcf")):
            paths_list.append(path + get_ending("vcf"))
    out_file = create_merged_vcf_path_function(key)

    if mode == 'independent':
        merge_vcfs_as_independent_snps(out_file, paths_list)
    elif mode == 'add':
        merge_vcfs_add_counts(out_file, paths_list)
    else:
        raise ValueError(mode)
Esempio n. 3
0
def main(base_path):
    exp = dict()
    with gzip.open(base_path + get_ending('vcf'), 'rt') as f:
        make_dict_from_vcf(f, exp)
    sorted_lines = [[chromosome, pos, ID, REF, ALT, R, A]
                    for ((chromosome, pos, ID, REF, ALT), (R,
                                                           A)) in exp.items()]
    sorted_lines = sorted(sorted_lines, key=lambda x: x[1])
    sorted_lines = sorted(sorted_lines, key=lambda x: x[0])
    if os.path.exists(repeats_path):
        with open(repeats_path, "r") as repeats_buffer:
            new_arr = []
            for chromosome, pos, ID, REF, ALT, R, A, in_repeats, repeat_type \
                    in Intersection(sorted_lines, repeats_buffer, write_intersect=True, write_segment_args=True):
                if in_repeats and ID == ".":
                    continue
                new_arr.append(
                    [chromosome, pos, ID, REF, ALT, R, A, repeat_type])
        sorted_lines = new_arr
    else:
        sorted_lines = [x + [''] for x in sorted_lines]
    for peak_type in callers_names:
        new_arr = []
        caller_path = make_sorted_caller_path(base_path, peak_type)
        if os.path.isfile(caller_path):
            peak_file = open(caller_path, "r")
        else:
            peak_file = []
        for chromosome, pos, ID, REF, ALT, R, A, repeat_type, *in_peaks in Intersection(
                sorted_lines, peak_file, write_intersect=True):
            new_arr.append([chromosome, pos, ID, REF, ALT, R, A, repeat_type] +
                           in_peaks)
        sorted_lines = new_arr
    table_annotated_path = base_path + get_ending('annotation')
    with open(table_annotated_path, "w") as out:
        out.write(
            pack([
                '#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts',
                'alt_read_counts', 'repeat_type'
            ] + callers_names))
        for split_line in sorted_lines:
            out.write(pack(split_line))
def main(for_what, remade=True):
    master_df = pd.read_table(master_list_path, dtype=dtype_dict)
    master_df = master_df[~master_df['EXP_TYPE'].
                          isin(['chip_control', 'chipexo_control'])]
    master_df['path'] = master_df.apply(create_path_from_master_list_df,
                                        axis=1)
    master_df = master_df[master_df['path'].apply(
        lambda x: os.path.isfile(x + get_ending('vcf')))]
    if for_what == 'badmaps':
        with open(get_merged_badmaps_dict_path(remade=remade),
                  "r") as read_file:
            d = json.load(read_file)
            rev_d = make_reverse_dict(d)
        master_df = master_df[master_df.apply(
            lambda row: os.path.isfile(row['path'] + get_ending(
                'annotation')) and is_valid(row['path'], rev_d, remade=remade),
            axis=1)]
        master_df['path'].to_csv(out_path, sep='\t', index=False, header=False)
    elif for_what == 'annotation':
        master_df[['path', 'PEAKS']].to_csv(out_path,
                                            sep='\t',
                                            index=False,
                                            header=False)
Esempio n. 5
0
def manual(exp, aligns):
    table_BAD = '/home/abramov/AlignmentsChip/{}/{}'.format(
        exp, aligns) + get_ending("BAD")
    output = '/home/abramov/test_K562_weighted_p/{}_{}'.format(
        exp, aligns) + get_ending("p-value")
    print('Now counting P-value for {}'.format(table_BAD))
    df_with_BAD = pd.read_table(table_BAD)
    # df_with_BAD = df_with_BAD[df_with_BAD['#chr'] == 'chr2']
    print(len(df_with_BAD.index))
    (p_ref, p_alt, p_ref_bayes, p_alt_bayes, p_ref_likelihood,
     p_alt_likelihood) = count_p_adjusted(
         np.array(df_with_BAD["ref_read_counts"], dtype=np.int_),
         np.array(df_with_BAD["alt_read_counts"], dtype=np.int_),
         np.array(df_with_BAD["BAD"], dtype=np.float_))
    df_with_BAD['p_value_ref'] = p_ref
    df_with_BAD['p_value_alt'] = p_alt
    df_with_BAD['p_value_ref_bayes'] = p_ref_bayes
    df_with_BAD['p_value_alt_bayes'] = p_alt_bayes
    df_with_BAD['p_value_ref_likelihood'] = p_ref_likelihood
    df_with_BAD['p_value_alt_likelihood'] = p_alt_likelihood

    print('i dump..')
    df_with_BAD.to_csv(output, sep="\t", index=False)
    print('i dump!')
Esempio n. 6
0
def main(remake=False):
    if remake:
        with open(get_new_badmaps_dict_path(), 'r') as read_file:
            d = json.loads(read_file.readline())
    else:
        with open(badmaps_dict_path, 'r') as read_file:
            d = json.loads(read_file.readline())
    keys = sorted(d.keys())
    with open(out_path, 'w') as file:
        for key in keys:
            is_empty = True
            for value in d[key]:
                if os.path.isfile(value + get_ending('vcf')):
                    is_empty = False
            if is_empty:
                continue
            file.write(key + '\n')