def simPheno(options): print 'importing covariance matrix' if options.cfile is None: options.cfile=options.bfile XX = readCovarianceMatrixFile(options.cfile,readEig=False)['K'] print 'simulating phenotypes' SP.random.seed(options.seed) simulator = sim.CSimulator(bfile=options.bfile,XX=XX,P=options.nTraits) Xr,region = simulator.getRegion(chrom_i=options.chrom,size=options.windowSize,min_nSNPs=options.nCausalR,pos_min=options.pos_min,pos_max=options.pos_max) Y,info = genPhenoCube(simulator,Xr,vTotR=options.vTotR,nCausalR=options.nCausalR,pCommonR=options.pCommonR,vTotBg=options.vTotBg,pHidd=options.pHidden,pCommon=options.pCommon) print 'exporting pheno file' if options.pfile is not None: outdir = os.path.split(options.pfile)[0] if not os.path.exists(outdir): os.makedirs(outdir) else: identifier = '_seed%d_nTraits%d_wndSize%d_vTotR%.2f_nCausalR%d_pCommonR%.2f_vTotBg%.2f_pHidden%.2f_pCommon%.2f'%(options.seed,options.nTraits,options.windowSize,options.vTotR,options.nCausalR,options.pCommonR,options.vTotBg,options.pHidden,options.pCommon) options.pfile = os.path.split(options.bfile)[-1] + '%s'%identifier pfile = options.pfile + '.phe' rfile = options.pfile + '.phe.region' SP.savetxt(pfile,Y) SP.savetxt(rfile,region)
def analyze(options): # load data print 'import data' if options.cfile is None: cov = {'eval': None, 'evec': None} warnings.warn( 'warning: cfile not specifed, a one variance compoenent model will be considered' ) else: cov = readCovarianceMatrixFile(options.cfile, readCov=False) Y = readPhenoFile(options.pfile, idx=options.trait_idx) null = readNullModelFile(options.nfile) wnds = readWindowsFile(options.wfile) F = None if options.ffile: F = readCovariatesFile(options.ffile) #null['params_mean'] = sp.loadtxt(options.nfile + '.f0') if F is not None: assert Y.shape[0] == F.shape[0], 'dimensions mismatch' if options.i0 is None: options.i0 = 1 if options.i1 is None: options.i1 = wnds.shape[0] # name of output file if options.perm_i is not None: res_dir = os.path.join(options.resdir, 'perm%d' % options.perm_i) else: res_dir = os.path.join(options.resdir, 'test') if not os.path.exists(res_dir): os.makedirs(res_dir) n_digits = len(str(wnds.shape[0])) fname = str(options.i0).zfill(n_digits) fname += '_' + str(options.i1).zfill(n_digits) + '.res' resfile = os.path.join(res_dir, fname) # analysis t0 = time.time() scan(options.bfile, Y, cov, null, wnds, options.minSnps, options.i0, options.i1, options.perm_i, resfile, F, options.colCovarType_r, options.rank_r) t1 = time.time() print '... finished in %s seconds' % (t1 - t0)
def analyze(options): # load data print 'import data' if options.cfile is None: cov = {'eval':None,'evec':None} warnings.warn('warning: cfile not specifed, a one variance compoenent model will be considered') else: cov = readCovarianceMatrixFile(options.cfile,readCov=False) Y = readPhenoFile(options.pfile,idx=options.trait_idx) null = readNullModelFile(options.nfile) wnds = readWindowsFile(options.wfile) F = None if options.ffile: F = readCovariatesFile(options.ffile) #null['params_mean'] = sp.loadtxt(options.nfile + '.f0') if F is not None: assert Y.shape[0]==F.shape[0], 'dimensions mismatch' if options.i0 is None: options.i0 = 1 if options.i1 is None: options.i1 = wnds.shape[0] # name of output file if options.perm_i is not None: res_dir = os.path.join(options.resdir,'perm%d'%options.perm_i) else: res_dir = os.path.join(options.resdir,'test') if not os.path.exists(res_dir): os.makedirs(res_dir) n_digits = len(str(wnds.shape[0])) fname = str(options.i0).zfill(n_digits) fname+= '_'+str(options.i1).zfill(n_digits)+'.res' resfile = os.path.join(res_dir,fname) # analysis t0 = time.time() scan(options.bfile,Y,cov,null,wnds,options.minSnps,options.i0,options.i1,options.perm_i,resfile,F,options.colCovarType_r,options.rank_r) t1 = time.time() print '... finished in %s seconds'%(t1-t0)
def preprocess(options): assert options.bfile!=None, 'Please specify a bfile.' """ computing the covariance matrix """ if options.compute_cov: assert options.bfile!=None, 'Please specify a bfile.' assert options.cfile is not None, 'Specify covariance matrix basename' print 'Computing covariance matrix' t0 = time.time() computeCovarianceMatrix(options.plink_path,options.bfile,options.cfile,options.sim_type) t1 = time.time() print '... finished in %s seconds'%(t1-t0) print 'Computing eigenvalue decomposition' t0 = time.time() eighCovarianceMatrix(options.cfile) t1 = time.time() print '... finished in %s seconds'%(t1-t0) """ computing principal components """ if options.compute_PCs>0: assert options.ffile is not None, 'Specify fix effects basename for saving PCs' t0 = time.time() computePCs(options.plink_path,options.compute_PCs,options.bfile,options.ffile) t1 = time.time() print '... finished in %s seconds'%(t1-t0) """ fitting the null model """ if options.fit_null: if options.nfile is None: options.nfile = os.path.split(options.bfile)[-1] warnings.warn('nfile not specifed, set to %s'%options.nfile) print 'Fitting null model' assert options.pfile is not None, 'phenotype file needs to be specified' # read pheno Y = readPhenoFile(options.pfile,idx=options.trait_idx) # read covariance if options.cfile is None: cov = {'eval':None,'evec':None} warnings.warn('cfile not specifed, a one variance compoenent model will be considered') else: cov = readCovarianceMatrixFile(options.cfile,readCov=False) assert Y.shape[0]==cov['eval'].shape[0], 'dimension mismatch' # read covariates F = None if options.ffile is not None: F = readCovariatesFile(options.ffile) assert Y.shape[0]==F.shape[0], 'dimensions mismatch' t0 = time.time() fit_null(Y,cov['eval'],cov['evec'],options.nfile, F) t1 = time.time() print '.. finished in %s seconds'%(t1-t0) """ precomputing the windows """ if options.precompute_windows: if options.wfile==None: options.wfile = os.path.split(options.bfile)[-1] + '.%d'%options.window_size warnings.warn('wfile not specifed, set to %s'%options.wfile) print 'Precomputing windows' t0 = time.time() pos = readBimFile(options.bfile) nWnds,nSnps=splitGeno(pos,size=options.window_size,out_file=options.wfile+'.wnd') print 'Number of variants:',pos.shape[0] print 'Number of windows:',nWnds print 'Minimum number of snps:',nSnps.min() print 'Maximum number of snps:',nSnps.max() t1 = time.time() print '.. finished in %s seconds'%(t1-t0) # plot distribution of nSnps if options.plot_windows: print 'Plotting ditribution of number of SNPs' plot_file = options.wfile+'.wnd.pdf' plt = pl.subplot(1,1,1) pl.hist(nSnps,30) pl.xlabel('Number of SNPs') pl.ylabel('Number of windows') pl.savefig(plot_file)
def preprocess(options): assert options.bfile != None, 'Please specify a bfile.' """ computing the covariance matrix """ if options.compute_cov: assert options.bfile != None, 'Please specify a bfile.' assert options.cfile is not None, 'Specify covariance matrix basename' print 'Computing covariance matrix' t0 = time.time() computeCovarianceMatrix(options.plink_path, options.bfile, options.cfile, options.sim_type) t1 = time.time() print '... finished in %s seconds' % (t1 - t0) print 'Computing eigenvalue decomposition' t0 = time.time() eighCovarianceMatrix(options.cfile) t1 = time.time() print '... finished in %s seconds' % (t1 - t0) """ computing principal components """ if options.compute_PCs > 0: assert options.ffile is not None, 'Specify fix effects basename for saving PCs' t0 = time.time() computePCs(options.plink_path, options.compute_PCs, options.bfile, options.ffile) t1 = time.time() print '... finished in %s seconds' % (t1 - t0) """ fitting the null model """ if options.fit_null: if options.nfile is None: options.nfile = os.path.split(options.bfile)[-1] warnings.warn('nfile not specifed, set to %s' % options.nfile) print 'Fitting null model' assert options.pfile is not None, 'phenotype file needs to be specified' # read pheno Y = readPhenoFile(options.pfile, idx=options.trait_idx) # read covariance if options.cfile is None: cov = {'eval': None, 'evec': None} warnings.warn( 'cfile not specifed, a one variance compoenent model will be considered' ) else: cov = readCovarianceMatrixFile(options.cfile, readCov=False) assert Y.shape[0] == cov['eval'].shape[0], 'dimension mismatch' # read covariates F = None if options.ffile is not None: F = readCovariatesFile(options.ffile) assert Y.shape[0] == F.shape[0], 'dimensions mismatch' t0 = time.time() fit_null(Y, cov['eval'], cov['evec'], options.nfile, F) t1 = time.time() print '.. finished in %s seconds' % (t1 - t0) """ precomputing the windows """ if options.precompute_windows: if options.wfile == None: options.wfile = os.path.split( options.bfile)[-1] + '.%d' % options.window_size warnings.warn('wfile not specifed, set to %s' % options.wfile) print 'Precomputing windows' t0 = time.time() pos = readBimFile(options.bfile) nWnds, nSnps = splitGeno(pos, size=options.window_size, out_file=options.wfile + '.wnd') print 'Number of variants:', pos.shape[0] print 'Number of windows:', nWnds print 'Minimum number of snps:', nSnps.min() print 'Maximum number of snps:', nSnps.max() t1 = time.time() print '.. finished in %s seconds' % (t1 - t0) # plot distribution of nSnps if options.plot_windows: print 'Plotting ditribution of number of SNPs' plot_file = options.wfile + '.wnd.pdf' plt = pl.subplot(1, 1, 1) pl.hist(nSnps, 30) pl.xlabel('Number of SNPs') pl.ylabel('Number of windows') pl.savefig(plot_file)