Beispiel #1
0
def definesets(opt):

    assert opt.bfile is not None, 'Please specify a bfile.'
    """ precomputing the windows """
    if opt.wfile is None:
        opt.wfile = os.path.split(opt.bfile)[-1] + '.%d' % opt.window_size
        warnings.warn('wfile not specifed, set to %s' % opt.wfile)

    t0 = time.time()

    (bim, fam, G) = read_plink(opt.bfile, verbose=False)

    if opt.sliding_wind:
        print('Precomputing windows')
        sets = sets_from_bim(bim,
                             size=opt.window_size,
                             step=opt.step,
                             chrom=opt.chrom,
                             minSnps=opt.minSnps,
                             maxSnps=opt.maxSnps)

    elif opt.filter_sets:
        assert opt.iwfile is not None, 'Please specify a iwfile.'
        print('Filtering windows')
        pdb.set_trace()
        sets = annotate_sets(sets0,
                             bim,
                             minSnps=opt.minSnps,
                             maxSnps=opt.maxSnps)

    sets.to_csv(opt.wfile + '.wnd', sep='\t', index=False)

    print(('Number of variants:', G.shape[0]))
    print(('Number of windows:', sets.shape[0]))
    t1 = time.time()
    print(('.. finished in %s seconds' % (t1 - t0)))
Beispiel #2
0
 def _load(self):
     (bim, fam, bed) = read_plink(self._prefix, verbose=False)
     self._snpinfo = bim
     self._ind_info = fam
     self._geno = bed
Beispiel #3
0
    # convert to pandas dataframe and export
    out = pd.DataFrame(out,
                       columns=['setid', 'chrom', 'start', 'end', 'nsnps'])
    return out


def calc_nsnps(pos, start, end):
    nsnps = np.zeros(start.shape[0], dtype=int)
    for i, r in enumerate(zip(start, end)):
        nsnps[i] = ((pos >= r[0]) & (pos < r[1])).sum()
    return nsnps


if __name__ == '__main__':

    import os

    bedpath = 'data/chrom22_subsample20_maf0.10'

    if not os.path.exists(bedpath + '.bim'):
        os.system('wget http://www.ebi.ac.uk/~casale/data.zip')
        os.system('unzip data.zip')

    (bim, fam, G) = read_plink(bedpath)

    # sets_from_bim
    sets = sets_from_bim(bim)

    # annotate_sets
    sets1 = annotate_sets(sets, bim, minSnps=100, maxSnps=200)