コード例 #1
0
def main(key, remade=True):
    table_annotated = key + get_ending("annotation")
    output = get_results_file(key, 'BAD')

    with open(get_merged_badmaps_dict_path(remade=remade), "r") as read_file:
        d = json.load(read_file)
        rev_d = make_reverse_dict(d)

    badmap_file_name = rev_d[key]

    print('Now doing {} \n with BAD map file {}'.format(table_annotated, badmap_file_name))
    badmap_file_path = create_badmaps_path_function(badmap_file_name, valid=remade)
    with open(badmap_file_path, 'r') as badmap_file, open(output, 'w') as out, open(table_annotated, 'r') as table_file:
        out.write(pack(['#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts', 'alt_read_counts',
                        'repeat_type'] + callers_names + ['BAD'] + ["Q{:.2f}".format(x) for x in segmentation_states] +
                       ['SNP_count', 'sum_cover']))

        u = UnpackBadSegments(None)
        for chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type, in_callers, \
            in_intersection, segment_BAD, segment_snps, segment_snp_ids,\
                    segment_sumcov, Qual in \
                Intersection(table_file, badmap_file, write_segment_args=True, write_intersect=True,
                             unpack_snp_function=lambda x: unpack(x, use_in='Pcounter'),
                             unpack_segments_function=lambda x: u.unpack_bad_segments(x, segmentation_states)):
            if in_intersection and ID.startswith('rs'):
                out.write(pack([chr, pos, ID, ref, alt, ref_c, alt_c, repeat_type] +
                               [in_callers[name] for name in callers_names] +
                               [segment_BAD] + [Qual[x] for x in Qual] + [segment_snp_ids, segment_sumcov]))
コード例 #2
0
def main(base_path):
    exp = dict()
    with gzip.open(base_path + get_ending('vcf'), 'rt') as f:
        make_dict_from_vcf(f, exp)
    sorted_lines = [[chromosome, pos, ID, REF, ALT, R, A]
                    for ((chromosome, pos, ID, REF, ALT), (R,
                                                           A)) in exp.items()]
    sorted_lines = sorted(sorted_lines, key=lambda x: x[1])
    sorted_lines = sorted(sorted_lines, key=lambda x: x[0])
    if os.path.exists(repeats_path):
        with open(repeats_path, "r") as repeats_buffer:
            new_arr = []
            for chromosome, pos, ID, REF, ALT, R, A, in_repeats, repeat_type \
                    in Intersection(sorted_lines, repeats_buffer, write_intersect=True, write_segment_args=True):
                if in_repeats and ID == ".":
                    continue
                new_arr.append(
                    [chromosome, pos, ID, REF, ALT, R, A, repeat_type])
        sorted_lines = new_arr
    else:
        sorted_lines = [x + [''] for x in sorted_lines]
    for peak_type in callers_names:
        new_arr = []
        caller_path = make_sorted_caller_path(base_path, peak_type)
        if os.path.isfile(caller_path):
            peak_file = open(caller_path, "r")
        else:
            peak_file = []
        for chromosome, pos, ID, REF, ALT, R, A, repeat_type, *in_peaks in Intersection(
                sorted_lines, peak_file, write_intersect=True):
            new_arr.append([chromosome, pos, ID, REF, ALT, R, A, repeat_type] +
                           in_peaks)
        sorted_lines = new_arr
    table_annotated_path = base_path + get_ending('annotation')
    with open(table_annotated_path, "w") as out:
        out.write(
            pack([
                '#chr', 'pos', 'ID', 'ref', 'alt', 'ref_read_counts',
                'alt_read_counts', 'repeat_type'
            ] + callers_names))
        for split_line in sorted_lines:
            out.write(pack(split_line))
コード例 #3
0
def main(files, path_to_output):
    phenotypes_for_db_list = [
        parse_grasp(files['GRASP']),
        parse_ebi(files['EBI']),
        parse_clinvar(files['ClinVar']),
        parse_phewas(files['PheWas']),
        parse_finemapping(files['FineMapping'])
    ]

    phenotypes_ids_dict = {}
    ids_phenotypes_dict = {}
    phenotype_id = 1

    def remove_phen_name_punctuation(phenotype_name):
        return phenotype_name.lower().replace("'", '').replace('_', ' ')

    for db in phenotypes_for_db_list:
        for phenotype in db:
            if remove_phen_name_punctuation(
                    phenotype) not in phenotypes_ids_dict:
                phenotypes_ids_dict[remove_phen_name_punctuation(
                    phenotype)] = phenotype_id
                ids_phenotypes_dict[
                    phenotype_id] = remove_phen_name_punctuation(phenotype)
                phenotype_id += 1

    all_phenotypes = {}

    for i in range(len(phenotypes_for_db_list)):
        for phenotype in phenotypes_for_db_list[i]:
            for s in phenotypes_for_db_list[i][phenotype]:
                if s not in all_phenotypes:
                    all_phenotypes[s] = {x: set() for x in phenotype_db_names}
                all_phenotypes[s][phenotype_db_names[i]].add(
                    phenotypes_ids_dict[remove_phen_name_punctuation(
                        phenotype)])

    print('pheno sizes:', len(phenotypes_ids_dict), len(all_phenotypes))
    with open(path_to_output, 'w') as out:
        header = ['RSID', '#all', '#allbutgrasp', '#allsum', '#allsumbutgrasp'] + \
                ['#' + x for x in phenotype_db_names] + \
                phenotype_db_names

        out.write('\t'.join(header) + '\n')
        for s in all_phenotypes:
            abn, phenotypes_without_grasp = set(), set()
            for db in phenotype_db_names:
                if db != 'grasp':
                    phenotypes_without_grasp.update(all_phenotypes[s][db])
                abn.update(all_phenotypes[s][db])
            bb = [len(all_phenotypes[s][x]) for x in phenotype_db_names]
            bb = [
                sum(len(all_phenotypes[s][x]) for x in phenotype_db_names),
                sum(len(all_phenotypes[s][x]) for x in phenotype_db_names[1:])
            ] + bb
            bb = [len(abn), len(phenotypes_without_grasp)] + bb
            cc = [
                ';'.join(
                    sorted(
                        [ids_phenotypes_dict[y]
                         for y in all_phenotypes[s][x]]))
                for x in phenotype_db_names
            ]
            out.write(pack(['rs{}'.format(s), *bb, *cc]))
            for x in phenotype_db_names:
                all_phenotypes[s][x] = len(all_phenotypes[s][x])
    print('{} is successfully created'.format(path_to_output))
コード例 #4
0
def correlation_with_cosmic(SNP_objects,
                            mode,
                            method='normal',
                            heatmap_data_file=None,
                            cell_line_name='',
                            cosmic_names=None):
    if cosmic_names is None:
        cosmic_names = {}
    heatmap = None if heatmap_data_file is None else open(
        heatmap_data_file, 'w')
    cosmic_segments = []
    with open(cosmic_path, 'r') as cosmic_file:
        for line in cosmic_file:
            cosmic_cell_line_segments = unpack_cosmic_segments(
                line,
                mode=mode,
                cell_line_name=cell_line_name,
                cosmic_names=cosmic_names)
            if cosmic_cell_line_segments:
                cosmic_segments.append(cosmic_cell_line_segments)
    snp_BAD_list = []
    cosmic_BAD_list = []
    if method == 'normal':
        for chromosome, pos, snp_BAD, quals, in_intersect, cosmic_BAD \
                in Intersection(SNP_objects, cosmic_segments, write_intersect=True,
                                write_segment_args=True):
            if not in_intersect:
                continue
            snp_BAD_list.append(snp_BAD)
            cosmic_BAD_list.append(cosmic_BAD)

            if heatmap is not None:
                heatmap.write(pack([chromosome, pos, snp_BAD, cosmic_BAD]))
        if heatmap is not None:
            heatmap.close()

        if len(snp_BAD_list) != 0:
            kt = kendalltau(snp_BAD_list, cosmic_BAD_list)[0]
            if kt == 'nan':
                return 'NaN'
            return kt
        return 'NaN'
    elif method == 'cover':
        for chromosome, pos, cov, snp_BAD, quals, in_intersect, cosmic_BAD \
                in Intersection(SNP_objects, cosmic_segments, write_intersect=True,
                                write_segment_args=True):
            if not in_intersect:
                continue
            snp_BAD_list.append(snp_BAD)
            cosmic_BAD_list.append(cosmic_BAD)

            if heatmap is not None:
                heatmap.write(
                    pack([chromosome, pos, cov, snp_BAD, cosmic_BAD] +
                         [quals[x] for x in quals]))
        if heatmap is not None:
            heatmap.close()

        if len(snp_BAD_list) != 0:
            kt = kendalltau(snp_BAD_list, cosmic_BAD_list)[0]
            if kt == 'nan':
                return 'NaN'
            return kt
        return 'NaN'
コード例 #5
0
def main(what_for, key_name, remade=True):
    check_if_in_expected_args(what_for)

    table_path = get_result_table_path(what_for, key_name)

    with open(get_merged_badmaps_dict_path(remade=remade), "r") as read_file:
        old_rev_d = make_reverse_dict(json.load(read_file))
        rev_d = {
            get_results_file(k, 'p-value', False): v
            for k, v in old_rev_d.items()
        }

    tables = []
    if what_for == "CL":
        tables = cell_lines_dict[key_name]
    if what_for == "TF":
        tables = tf_dict[key_name]
    print('Reading datasets for {} {}'.format(what_for, key_name))
    common_snps = dict()
    for table in tables:
        if os.path.isfile(table) and is_valid(
                split_ext_recursive(table), rev_d, remade=remade):
            table_name = get_name(table)
            another_agr = get_another_agr(table, what_for)
            with open(table, 'r') as file:
                for line in file:
                    try:
                        (chromosome, pos, ID, ref, alt, ref_c, alt_c, repeat,
                         in_callers, BAD, Quals, seg_c, sum_cov, p_ref, p_alt,
                         es_ref, es_alt) = unpack(line, use_in="Aggregation")
                    except ValueError:
                        if line.startswith('#'):
                            continue
                        else:
                            raise
                    if np.isnan(p_ref) or ID == '.':
                        continue
                    cov = ref_c + alt_c

                    try:
                        common_snps[(chromosome, pos, ID, ref, alt,
                                     repeat)].append(
                                         (cov, ref_c, alt_c, in_callers, BAD,
                                          Quals, seg_c, sum_cov, p_ref, p_alt,
                                          es_ref, es_alt, table_name,
                                          another_agr))
                    except KeyError:
                        common_snps[(chromosome, pos, ID, ref, alt,
                                     repeat)] = [
                                         (cov, ref_c, alt_c, in_callers, BAD,
                                          Quals, seg_c, sum_cov, p_ref, p_alt,
                                          es_ref, es_alt, table_name,
                                          another_agr)
                                     ]
        else:
            print("There is no {}".format(table))
    print('Writing {}'.format(key_name))

    with open(table_path, 'w') as out:
        out.write(
            pack([
                '#chr', 'pos', 'ID', 'ref', 'alt', 'repeat_type',
                'n_peak_calls', 'n_peak_callers', 'mean_BAD',
                'mean_SNP_per_segment', 'n_aggregated', 'refc_mostsig_ref',
                'altc_mostsig_ref', 'BAD_mostsig_ref', 'es_mostsig_ref',
                'p_mostsig_ref', 'refc_mostsig_alt', 'altc_mostsig_alt',
                'BAD_mostsig_alt', 'es_mostsig_alt', 'p_mostsig_alt',
                'min_cover', 'max_cover', 'median_cover', 'total_cover',
                'es_mean_ref', 'es_mean_alt', 'logitp_ref', 'logitp_alt'
            ]))

        SNP_counter = 0
        print('{} snps'.format(len(common_snps)))

        if len(common_snps) == 0:
            os.remove(table_path)
            sys.exit(0)
        origin_of_snp_dict = OrderedDict()
        keys = list(common_snps.keys())
        keys = sorted(keys, key=lambda chr_pos: chr_pos[1])
        keys = sorted(keys, key=lambda chr_pos: chr_pos[0])
        for key in keys:
            chromosome, pos, ID, ref, alt, repeat = key
            value = common_snps[key]
            SNP_counter += 1
            if SNP_counter % 10000 == 0:
                print('done {}'.format(SNP_counter))
            unique_callers_counter = dict(
                zip(callers_names, [False] * len(callers_names)))
            total_callers_counter = 0
            BAD_array = []
            SNPs_per_segment_array = []
            p_ref_array = []
            p_alt_array = []
            cover_array = []
            ref_effect_size_array = []
            alt_effect_size_array = []
            table_names_array = []
            another_agr_name = []
            ref_counts_array = []
            alt_counts_array = []

            for v in value:
                cov, ref_c, alt_c, in_callers, BAD, Quals, seg_c, sum_cov, p_ref, p_alt, es_ref, es_alt, table_name, \
                another_agr = v

                table_names_array.append(table_name)
                another_agr_name.append(another_agr)
                for caller in callers_names:
                    unique_callers_counter[caller] = unique_callers_counter[
                        caller] or in_callers[caller]
                    total_callers_counter += in_callers[caller]
                BAD_array.append(BAD)
                SNPs_per_segment_array.append(seg_c)
                p_ref_array.append(p_ref)
                p_alt_array.append(p_alt)
                if not np.isnan(es_ref):
                    ref_effect_size_array.append(es_ref / np.log(2))
                if not np.isnan(es_alt):
                    alt_effect_size_array.append(es_alt / np.log(2))
                cover_array.append(cov)

                ref_counts_array.append(ref_c)
                alt_counts_array.append(alt_c)
                p = 1 / (BAD + 1)

            min_cover = min(cover_array)
            max_cover = max(cover_array)
            med_cover = median_grouped(cover_array)
            total_cover = sum(cover_array)
            unique_callers = sum(unique_callers_counter[caller]
                                 for caller in callers_names)
            mean_BAD = np.round(np.mean(BAD_array), 2)
            mean_SNPs_per_segment = np.round(np.mean(SNPs_per_segment_array),
                                             1)
            n_aggregated = len(value)

            logitp_ref = logit_combine_p_values(p_ref_array)
            logitp_palt = logit_combine_p_values(p_alt_array)

            if ref_effect_size_array:
                weights = [-1 * np.log10(x) for x in p_ref_array if x != 1]
                es_mean_ref = np.round(
                    np.average(ref_effect_size_array, weights=weights), 3)
                es_mostsig_ref = ref_effect_size_array[int(np.argmax(weights))]
                idx = int(np.argmax([-x for x in p_ref_array]))
                p_mostsig_ref = p_ref_array[idx]
                ref_c_mostsig_ref = ref_counts_array[idx]
                alt_c_mostsig_ref = alt_counts_array[idx]
                BAD_mostsig_ref = BAD_array[idx]
            else:
                es_mean_ref = 'NaN'
                es_mostsig_ref = 'NaN'
                ref_c_mostsig_ref = 'NaN'
                p_mostsig_ref = 'NaN'
                alt_c_mostsig_ref = 'NaN'
                BAD_mostsig_ref = 'NaN'

            if alt_effect_size_array:
                weights = [-1 * np.log10(x) for x in p_alt_array if x != 1]
                es_mean_alt = np.round(
                    np.average(alt_effect_size_array, weights=weights), 3)
                es_mostsig_alt = alt_effect_size_array[int(np.argmax(weights))]
                idx = int(np.argmax([-x for x in p_alt_array]))
                p_mostsig_alt = p_alt_array[idx]
                ref_c_mostsig_alt = ref_counts_array[idx]
                alt_c_mostsig_alt = alt_counts_array[idx]
                BAD_mostsig_alt = BAD_array[idx]
            else:
                es_mean_alt = 'NaN'
                es_mostsig_alt = 'NaN'
                ref_c_mostsig_alt = 'NaN'
                p_mostsig_alt = 'NaN'
                alt_c_mostsig_alt = 'NaN'
                BAD_mostsig_alt = 'NaN'

            out.write(
                pack([
                    chromosome, pos, ID, ref, alt, repeat,
                    total_callers_counter, unique_callers, mean_BAD,
                    mean_SNPs_per_segment, n_aggregated, ref_c_mostsig_ref,
                    alt_c_mostsig_ref, BAD_mostsig_ref, es_mostsig_ref,
                    p_mostsig_ref, ref_c_mostsig_alt, alt_c_mostsig_alt,
                    BAD_mostsig_alt, es_mostsig_alt, p_mostsig_alt, min_cover,
                    max_cover, med_cover, total_cover, es_mean_ref,
                    es_mean_alt, logitp_ref, logitp_palt
                ]))
            origin_of_snp_dict["\t".join(map(str, key))] = {
                'aligns': table_names_array,
                expected_args[what_for]: another_agr_name,
                'ref_counts': ref_counts_array,
                'alt_counts': alt_counts_array,
                'ref_ef': ref_effect_size_array,
                'alt_ef': alt_effect_size_array,
                'BAD': BAD_array,
                'ref_pvalues': p_ref_array,
                'alt_pvalues': p_alt_array,
            }

    print("Counting FDR")

    table = pd.read_table(table_path)
    if table.empty:
        os.remove(table_path)
        sys.exit(0)

    mc_filter_array = np.array(table['max_cover'] >= 20)
    if sum(mc_filter_array) != 0:
        bool_ar_ref, p_val_ref, _, _ = statsmodels.stats.multitest.multipletests(
            table[mc_filter_array]["logitp_ref"], alpha=0.05, method='fdr_bh')
        bool_ar_alt, p_val_alt, _, _ = statsmodels.stats.multitest.multipletests(
            table[mc_filter_array]["logitp_alt"], alpha=0.05, method='fdr_bh')
    else:
        p_val_ref = []
        p_val_alt = []
        bool_ar_ref = []
        bool_ar_alt = []

    fdr_by_ref = np.array(['NaN'] * len(table.index), dtype=np.float128)
    fdr_by_ref[mc_filter_array] = p_val_ref
    table["fdrp_bh_ref"] = fdr_by_ref

    fdr_by_alt = np.array(['NaN'] * len(table.index), dtype=np.float128)
    fdr_by_alt[mc_filter_array] = p_val_alt
    table["fdrp_bh_alt"] = fdr_by_alt

    table.to_csv(table_path, sep="\t", index=False)

    bool_ar = np.array([False] * len(table.index), dtype=np.bool)
    bool_ar[mc_filter_array] = bool_ar_alt + bool_ar_ref

    with open(
            os.path.join(results_path,
                         what_for + '_DICTS/{}.json'.format(key_name)),
            'w') as out:
        json.dump(origin_of_snp_dict, out)
コード例 #6
0
def main(remake=False):
    correlation_file_path = get_correlation_file_path(remake=remake)
    cor_df_test = find_test_datasets(correlation_file_path)
    test_dfs = open_dfs(cor_df_test, remake=remake, concat=False)
    print('Test concatenated')

    min_tr, max_tr = 20, 75

    results = []
    for dataset, dataset_df in test_dfs:
        cov_dfs_test = {}
        for cov in dataset_df['cov'].unique():
            cov_dfs_test[cov] = dataset_df[dataset_df['cov'] == cov].copy()
        print('Split test {}'.format(dataset))
        args, vals, covs = construct_total_dist(cov_dfs_test,
                                                min_tr=min_tr,
                                                max_tr=max_tr)
        results.append({
            'args': args,
            'vals': vals,
            'covs': covs,
            'dataset': dataset,
            'snps': len(dataset_df.index)
        })

    with open(os.path.expanduser('~/cov_res_debug.json'), 'w') as f:
        json.dump(results, f)

    cors = pd.read_table(correlation_file_path)

    if not remake:
        # collect_stats
        cell_line_data = {}
        for d in results:
            if d['args']:
                line, cells = d['dataset'].split('@')
                cor = cors[(cors['#cell_line'] == line) & (
                    cors['cells'] == cells)]['cor_by_snp_CAIC'].tolist()
                assert len(cor) == 1
                cor = cor[0]
                if not pd.isna(cor):
                    cell_line_data.setdefault(line, {
                        'correlations': [],
                        'cells': [],
                        'snps': []
                    })
                    cell_line_data[line]['correlations'].append(cor)
                    cell_line_data[line]['cells'].append(cells)
                    cell_line_data[line]['snps'].append(sum(d['vals']))

        # construct big cell lines
        big_cell_lines = set()
        cell_line_reference = {}
        for line, data in cell_line_data.items():
            if len(data['correlations']) < 4:
                continue
            cor_treshold = np.quantile(data['correlations'], 0.75)
            datasets = [(cells, cor, snps) for cells, cor, snps in zip(
                data['cells'], data['correlations'], data['snps'])
                        if cor >= cor_treshold]
            snps = sum(x[2] for x in datasets)
            if snps >= 25000:
                cell_line_reference[line] = [x[0] for x in datasets]
                big_cell_lines.add(line)
    else:
        prev_excluded = pd.read_table(
            get_excluded_badmaps_list_path(remake=False))
        big_cell_lines = set()
        cell_line_reference = {}
        for index, row in prev_excluded.iterrows():
            if row['is_ref']:
                big_cell_lines.add(row['#cell_line'])
                cell_line_reference.setdefault(row['#cell_line'],
                                               []).append(row['sample'])

    big_cell_lines = list(big_cell_lines)

    ref_dists = {x: {} for x in big_cell_lines + ['Other']}
    ref_vars = {x: {} for x in big_cell_lines + ['Other']}

    all_vars = []
    all_metrics = []
    all_cells = []
    all_lines = []
    all_sizes = []
    all_is_ref = []

    for d in results:
        if d['args']:
            line, cells = d['dataset'].split('@')
            dist = dict(zip(d['args'], d['vals']))
            if line in big_cell_lines:
                if cells in cell_line_reference[line]:
                    ref_dists[line] = update_dist(ref_dists[line], dist)
                    ref_dists['Other'] = update_dist(ref_dists[line], dist)

    for key in ref_dists:
        ref_dists[key] = transform_dist_to_list(ref_dists[key])
        ref_vars[key] = np.nanstd(ref_dists[key])

    print(ref_vars)

    for d in results:
        if d['args']:
            dist = dict(zip(d['args'], d['vals']))
            line, cells = d['dataset'].split('@')
            snps = d['snps']
            flat_dist = transform_dist_to_list(dist)
            ref_dist = ref_dists[line if line in big_cell_lines else 'Other']
            if not flat_dist or len(flat_dist) == 0:
                continue
            if not ref_dist or len(ref_dist) == 0:
                print('Empty ref dist for {}'.format(line))
                exit(1)
            stat, p = levene(flat_dist, ref_dist)
            assert not pd.isna(p)
            all_vars.append(
                (np.nanstd(flat_dist),
                 ref_vars[line if line in big_cell_lines else 'Other']))
            all_metrics.append(p)
            all_cells.append(cells)
            all_lines.append(line)
            all_sizes.append(snps)
            all_is_ref.append(True if line in big_cell_lines and cells in
                              cell_line_reference[line] else False)

    _, all_fdr, _, _ = statsmodels.stats.multitest.multipletests(
        all_metrics, alpha=0.05, method='fdr_bh')

    with open(get_excluded_badmaps_list_path(remake=remake), 'w') as out:
        out.write(
            pack([
                '#cell_line', 'sample', 'size', 'dataset_es_var', 'ref_es_var',
                'fdr', 'is_ref'
            ]))
        for fdr, size, line, ce, var, ref in zip(all_fdr, all_sizes, all_lines,
                                                 all_cells, all_vars,
                                                 all_is_ref):
            out.write(pack([line, ce, size, var[0]**2, var[1]**2, fdr, ref]))
コード例 #7
0
def main(file_name, remake=False):
    correlation_path = get_correlation_path()
    with open(badmaps_dict_path, 'r') as file:
        aligns_by_cell_type = json.loads(file.readline().strip())

    modes = []
    for dir_name in sorted(os.listdir(get_badmaps_path_by_validity())):
        if os.path.isdir(os.path.join(get_badmaps_path_by_validity(), dir_name)):
            modes.append(dir_name)

    try:
        assert os.path.isfile(os.path.join(badmaps_path, 'merged_vcfs', file_name))
    except AssertionError:
        print(os.path.join(badmaps_path, 'merged_vcfs', file_name), file_name)
        exit(1)

    name = file_name.split('@')[0]
    lab = os.path.splitext(file_name.split('@')[1])[0]

    try:
        aligns = aligns_by_cell_type[file_name[:-4]]  # .tsv
        al_list = [os.path.basename(align) for align in aligns if os.path.isfile(align)]
        datasetsn = len(al_list)
    except KeyError:
        datasetsn = 'nan'
        al_list = []
        print(file_name)

    table_path = os.path.join(badmaps_path, 'merged_vcfs', file_name)
    for mode in modes:
        if re.match(r'^CAIC@.+@.+$', mode) is not None:
            states = get_states(mode.split('@')[1])
        else:
            states = get_states('')
        out_dir = os.path.join(correlation_path, mode + '_tables{}'.format('_filtered' if remake else ''))
        if not os.path.isdir(out_dir):
            try:
                os.mkdir(out_dir)
            except OSError as exc:
                if exc.errno != errno.EEXIST:
                    raise
                pass
        badmaps_file_path = os.path.join(get_badmaps_path_by_validity(valid=remake), mode, name + '@' + lab + '.badmap.tsv')
        out_path = os.path.join(out_dir, name + '@' + lab + '.tsv')
        print(out_path)

        u = UnpackBadSegments(0)

        with open(table_path, 'r') as table, open(badmaps_file_path, 'r') as BADmap_file, open(out_path, 'w') as out:
            out.write('#' + str(datasetsn) + '@' + lab + '@' + ','.join(al_list) + '\n')
            for chrom, pos, ref, alt, filename, in_intersection, segment_BAD, segment_snps, segment_snp_ids,\
                    segment_sumcov, segment_id, Qual \
                    in Intersection(table, BADmap_file,
                                    unpack_segments_function=lambda x: u.unpack_bad_segments(x, states),
                                    unpack_snp_function=unpack_snps,
                                    write_intersect=True, write_segment_args=True):
                if not in_intersection:
                    continue
                p_value = get_p_value(ref + alt, 1 / (segment_BAD + 1), min(ref, alt))
                out.write(pack([chrom, pos, ref, alt, segment_BAD] +
                               [Qual[x] for x in Qual] + [segment_snp_ids, segment_sumcov] +
                               [filename, segment_id, p_value]))