Esempio n. 1
0
def reduce_data(beta_path, df, is_nice):
    if is_nice:
        df = df[['startCpG', 'endCpG']].astype(int)
        start = df['startCpG'].values[0]
        end = df['endCpG'].values[df.shape[0] - 1]
        return fast_method(load_beta_data(beta_path, (start, end)), df)
    else:
        return slow_method(load_beta_data(beta_path), df)
Esempio n. 2
0
def merge_betas(betas, opath):
    """
    Merge all betas by summing their values element-wise, while keeping the dimensions
    :param betas: list of beta files
    :param opath: merged beta file
    """
    data = load_beta_data(betas[0]).astype(np.int)
    for b in betas[1:]:
        data += load_beta_data(b)

    # Trim / normalize to range [0, 256)
    data = trim_to_uint8(data)
    # Dump
    data.tofile(opath)
    return data
Esempio n. 3
0
def apply_filter_wrapper(args, blocks_bins, finds, beta_path, df):
    try:
        # load beta file:
        data = load_beta_data(beta_path)

        # reduce to blocks:
        blocks_bins[-1] -= 1
        reduced_data = np.add.reduceat(data, blocks_bins)[finds][:-1]

        # dump to file
        out_name = splitext(splitext(basename(args.blocks_file))[0])[0]
        out_name = splitext(basename(beta_path))[0] + '_' + out_name + '.bin'
        out_name = out_name.replace('_genome', '')
        out_name = op.join(args.out_dir, out_name)

        trim_to_uint8(reduced_data).tofile(out_name)
        print(out_name)

        if args.bedGraph:
            with np.errstate(divide='ignore', invalid='ignore'):
                beta_vals = reduced_data[:, 0] / reduced_data[:, 1]
                eprint(beta_vals.shape, df.shape)
            # beta_vals[reduced_data[:, 1] == 0] = np.nan
            df['beta'] = beta_vals
            df.to_csv(out_name.replace('.bin', '.bedGraph'), sep='\t',
                      index=None, header=None, na_rep=-1,
                      float_format='%.2f')

    except Exception as e:
        print('Failed with beta', beta_path)
        print('Exception:', e)
Esempio n. 4
0
def compare_all_paires(args):
    betas = args.betas
    sites = GenomicRegion(args).sites
    tables = [load_beta_data(b, sites) for b in betas]
    names = [op.splitext(op.basename(b))[0] for b in betas]
    # break names to lines
    nnames = []
    k = 20
    for n in names:
        lst = [n[0 + i:k + i] for i in range(0, len(n), k)]
        nn = '\n'.join(lst)
        nnames.append(nn)

    N = len(tables)
    fig, axs = plt.subplots(N, N)
    for i in range(N):
        for j in range(i + 1):
            comp2(tables[i], tables[j], args.min_cov, axs[i, j])
        axs[i, 0].set_ylabel(nnames[i], fontsize=8)
    for j in range(N):
        axs[0, j].set_title(nnames[j], fontsize=8)

    for ax in axs.flat:
        ax.label_outer()

    fig.tight_layout()

    if args.outpath is not None:
        plt.savefig(args.outpath)
        eprint(f'[wt cmp] dumped figure to {args.outpath}')

    if args.show or args.outpath is None:
        plt.show()
Esempio n. 5
0
def compare_all_paires(betas, min_cov, sites):
    tables = [load_beta_data(b, sites) for b in betas]
    names = [op.splitext(op.basename(b))[0] for b in betas]

    for x, y in combinations(range(len(tables)), r=2):
        plt.figure()
        comp2(tables[x], tables[y], (names[x], names[y]), min_cov)
    plt.show()
Esempio n. 6
0
def beta_cov(beta_path, sites=None, bed_wrapper=None, print_res=False):
    if bed_wrapper:
        res = beta_cov_by_bed(beta_path, bed_wrapper)
    else:
        res = np.mean(load_beta_data(beta_path, sites)[:, 1])
    if print_res:
        print('{}\t{:.2f}'.format(pretty_name(beta_path), res))
    return res
Esempio n. 7
0
def beta_cov_by_bed(beta_path, bed_wrapper):
    nr_sites = 0
    total_cov = 0
    for gr in bed_wrapper.iter_grs():
        table = load_beta_data(beta_path, gr.sites)[:, 1]
        nr_sites += table.size
        total_cov += table.sum()
    return total_cov / nr_sites if nr_sites else 0
Esempio n. 8
0
def view_beta(beta_path, gr, opath):
    """
    View beta file in given region/sites range
    :param beta_path: beta file path
    :param gr: a GenomicRegion object
    :param opath: output path (or stdout)
    """
    data = load_beta_data(beta_path, gr.sites)
    np.savetxt(opath, data, fmt='%s', delimiter='\t')
Esempio n. 9
0
def mult_pat2beta(pat_path, out_beta, nr_sites, args):
    processes = []

    with Pool(args.threads) as p:
        chroms = list(
            GenomeRefPaths(args.genome).get_chrom_cpg_size_table()['chr'])
        for chrom in sorted(chroms):
            beta = '{}.{}.beta'.format(op.splitext(out_beta)[0], chrom)
            params = (chrom, pat_path, beta, nr_sites)
            processes.append(p.apply_async(chr_thread, params))
        p.close()
        p.join()

    res = np.zeros((nr_sites, 2), dtype=np.uint8)
    for bpath in [pr.get() for pr in processes]:
        res += load_beta_data(bpath)
        os.remove(bpath)
    res.tofile(out_beta)
    return out_beta
Esempio n. 10
0
def single_beta(beta_path, indices, cov_thresh):
    return op.splitext(op.basename(beta_path))[0], \
           beta2vec(load_beta_data(beta_path)[indices - 1], min_cov=cov_thresh).astype(np.float16)
Esempio n. 11
0
 def load_data(self):
     # raw table from *beta files:
     dsets = np.zeros((len(self.files), self.nr_sites, 2))
     for i, file in enumerate(self.files):
         dsets[i] = load_beta_data(file, (self.start, self.end))
     return dsets
Esempio n. 12
0
 def load_data(self):
     # raw table from *beta files:
     dsets = np.zeros((len(self.files), self.nr_sites, 2))
     for i, fpath in enumerate(self.files):
         dsets[i] = load_beta_data(fpath, self.gr.sites)
     return dsets
Esempio n. 13
0
 def load_beta(self, beta_path):
     """ Load beta to a numpy array """
     sites = (1, DEBUG_NR + 1) if self.debug else self.gr.sites
     barr = load_beta_data(beta_path, sites=sites)
     assert (barr.shape[0] == self.ref_dict.shape[0])
     return barr