def definesets(opt): assert opt.bfile is not None, 'Please specify a bfile.' """ precomputing the windows """ if opt.wfile is None: opt.wfile = os.path.split(opt.bfile)[-1] + '.%d' % opt.window_size warnings.warn('wfile not specifed, set to %s' % opt.wfile) t0 = time.time() (bim, fam, G) = read_plink(opt.bfile, verbose=False) if opt.sliding_wind: print('Precomputing windows') sets = sets_from_bim(bim, size=opt.window_size, step=opt.step, chrom=opt.chrom, minSnps=opt.minSnps, maxSnps=opt.maxSnps) elif opt.filter_sets: assert opt.iwfile is not None, 'Please specify a iwfile.' print('Filtering windows') pdb.set_trace() sets = annotate_sets(sets0, bim, minSnps=opt.minSnps, maxSnps=opt.maxSnps) sets.to_csv(opt.wfile + '.wnd', sep='\t', index=False) print(('Number of variants:', G.shape[0])) print(('Number of windows:', sets.shape[0])) t1 = time.time() print(('.. finished in %s seconds' % (t1 - t0)))
def _load(self): (bim, fam, bed) = read_plink(self._prefix, verbose=False) self._snpinfo = bim self._ind_info = fam self._geno = bed
# convert to pandas dataframe and export out = pd.DataFrame(out, columns=['setid', 'chrom', 'start', 'end', 'nsnps']) return out def calc_nsnps(pos, start, end): nsnps = np.zeros(start.shape[0], dtype=int) for i, r in enumerate(zip(start, end)): nsnps[i] = ((pos >= r[0]) & (pos < r[1])).sum() return nsnps if __name__ == '__main__': import os bedpath = 'data/chrom22_subsample20_maf0.10' if not os.path.exists(bedpath + '.bim'): os.system('wget http://www.ebi.ac.uk/~casale/data.zip') os.system('unzip data.zip') (bim, fam, G) = read_plink(bedpath) # sets_from_bim sets = sets_from_bim(bim) # annotate_sets sets1 = annotate_sets(sets, bim, minSnps=100, maxSnps=200)