Esempio n. 1
0
def simPheno(options):

    print 'importing covariance matrix'
    if options.cfile is None: options.cfile=options.bfile
    XX = readCovarianceMatrixFile(options.cfile,readEig=False)['K']

    print 'simulating phenotypes'
    SP.random.seed(options.seed)
    simulator = sim.CSimulator(bfile=options.bfile,XX=XX,P=options.nTraits)
    Xr,region = simulator.getRegion(chrom_i=options.chrom,size=options.windowSize,min_nSNPs=options.nCausalR,pos_min=options.pos_min,pos_max=options.pos_max)
 
    Y,info    = genPhenoCube(simulator,Xr,vTotR=options.vTotR,nCausalR=options.nCausalR,pCommonR=options.pCommonR,vTotBg=options.vTotBg,pHidd=options.pHidden,pCommon=options.pCommon)

    print 'exporting pheno file'
    if options.pfile is not None:
        outdir = os.path.split(options.pfile)[0]
        if not os.path.exists(outdir):
            os.makedirs(outdir)
    else:
        identifier = '_seed%d_nTraits%d_wndSize%d_vTotR%.2f_nCausalR%d_pCommonR%.2f_vTotBg%.2f_pHidden%.2f_pCommon%.2f'%(options.seed,options.nTraits,options.windowSize,options.vTotR,options.nCausalR,options.pCommonR,options.vTotBg,options.pHidden,options.pCommon)
        options.pfile = os.path.split(options.bfile)[-1] + '%s'%identifier

    pfile  = options.pfile + '.phe'
    rfile  = options.pfile + '.phe.region'

    SP.savetxt(pfile,Y)
    SP.savetxt(rfile,region)
Esempio n. 2
0
def analyze(options):

    # load data
    print 'import data'
    if options.cfile is None:
        cov = {'eval': None, 'evec': None}
        warnings.warn(
            'warning: cfile not specifed, a one variance compoenent model will be considered'
        )
    else:
        cov = readCovarianceMatrixFile(options.cfile, readCov=False)
    Y = readPhenoFile(options.pfile, idx=options.trait_idx)
    null = readNullModelFile(options.nfile)
    wnds = readWindowsFile(options.wfile)

    F = None
    if options.ffile:
        F = readCovariatesFile(options.ffile)
        #null['params_mean'] = SP.loadtxt(options.nfile + '.f0')

    if F is not None: assert Y.shape[0] == F.shape[0], 'dimensions mismatch'

    if options.i0 is None: options.i0 = 1
    if options.i1 is None: options.i1 = wnds.shape[0]

    # name of output file
    if options.perm_i is not None:
        res_dir = os.path.join(options.resdir, 'perm%d' % options.perm_i)
    else:
        res_dir = os.path.join(options.resdir, 'test')
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    n_digits = len(str(wnds.shape[0]))
    fname = str(options.i0).zfill(n_digits)
    fname += '_' + str(options.i1).zfill(n_digits) + '.res'
    resfile = os.path.join(res_dir, fname)

    # analysis
    t0 = time.time()
    scan(options.bfile, Y, cov, null, wnds, options.minSnps, options.i0,
         options.i1, options.perm_i, resfile, F)
    t1 = time.time()
    print '... finished in %s seconds' % (t1 - t0)
Esempio n. 3
0
def analyze(options):

    # load data
    print 'import data'
    if options.cfile is None:
        cov = {'eval':None,'evec':None}
        warnings.warn('warning: cfile not specifed, a one variance compoenent model will be considered')
    else:
        cov = readCovarianceMatrixFile(options.cfile,readCov=False)
    Y = readPhenoFile(options.pfile,idx=options.trait_idx)
    null = readNullModelFile(options.nfile)
    wnds = readWindowsFile(options.wfile)

    F = None
    if options.ffile:
        F = readCovariatesFile(options.ffile)
        #null['params_mean'] = SP.loadtxt(options.nfile + '.f0')
        

    if F is not None: assert Y.shape[0]==F.shape[0], 'dimensions mismatch'

            
    if options.i0 is None: options.i0 = 1
    if options.i1 is None: options.i1 = wnds.shape[0]

    # name of output file
    if options.perm_i is not None:
        res_dir = os.path.join(options.resdir,'perm%d'%options.perm_i)
    else:
        res_dir = os.path.join(options.resdir,'test')
    if not os.path.exists(res_dir):
        os.makedirs(res_dir)
    n_digits = len(str(wnds.shape[0]))
    fname = str(options.i0).zfill(n_digits)
    fname+= '_'+str(options.i1).zfill(n_digits)+'.res'
    resfile = os.path.join(res_dir,fname)

    # analysis
    t0 = time.time()
    scan(options.bfile,Y,cov,null,wnds,options.minSnps,options.i0,options.i1,options.perm_i,resfile,F)
    t1 = time.time()
    print '... finished in %s seconds'%(t1-t0)
Esempio n. 4
0
def preprocess(options):

    assert options.bfile!=None, 'Please specify a bfile.'
    
    """ computing the covariance matrix """
    if options.compute_cov:
       assert options.bfile!=None, 'Please specify a bfile.'
       assert options.cfile is not None, 'Specify covariance matrix basename'
       print 'Computing covariance matrix'
       t0 = time.time()
       computeCovarianceMatrix(options.plink_path,options.bfile,options.cfile,options.sim_type)
       t1 = time.time()
       print '... finished in %s seconds'%(t1-t0)
       print 'Computing eigenvalue decomposition'
       t0 = time.time()
       eighCovarianceMatrix(options.cfile) 
       t1 = time.time()
       print '... finished in %s seconds'%(t1-t0)

    """ computing principal components """
    if options.compute_PCs>0:
       assert options.ffile is not None, 'Specify fix effects basename for saving PCs'
       t0 = time.time()
       computePCs(options.plink_path,options.compute_PCs,options.bfile,options.ffile)
       t1 = time.time()
       print '... finished in %s seconds'%(t1-t0)
       

    """ fitting the null model """
    if options.fit_null:
        if options.nfile is None:
            options.nfile = os.path.split(options.bfile)[-1]
            warnings.warn('nfile not specifed, set to %s'%options.nfile)
        print 'Fitting null model'
        assert options.pfile is not None, 'phenotype file needs to be specified'
        # read pheno
        Y = readPhenoFile(options.pfile,idx=options.trait_idx)
        # read covariance
        if options.cfile is None:
            cov = {'eval':None,'evec':None}
            warnings.warn('cfile not specifed, a one variance compoenent model will be considered')
        else:
            cov = readCovarianceMatrixFile(options.cfile,readCov=False)
            assert Y.shape[0]==cov['eval'].shape[0],  'dimension mismatch'
        # read covariates
        F = None
        if options.ffile is not None:
            F = readCovariatesFile(options.ffile)
            assert Y.shape[0]==F.shape[0], 'dimensions mismatch'
        t0 = time.time()
        fit_null(Y,cov['eval'],cov['evec'],options.nfile, F)
        t1 = time.time()
        print '.. finished in %s seconds'%(t1-t0)

    """ precomputing the windows """
    if options.precompute_windows:
        if options.wfile==None:
            options.wfile = os.path.split(options.bfile)[-1] + '.%d'%options.window_size
            warnings.warn('wfile not specifed, set to %s'%options.wfile)
        print 'Precomputing windows'
        t0 = time.time()
        pos = readBimFile(options.bfile)
        nWnds,nSnps=splitGeno(pos,size=options.window_size,out_file=options.wfile+'.wnd')
        print 'Number of variants:',pos.shape[0]
        print 'Number of windows:',nWnds
        print 'Minimum number of snps:',nSnps.min()
        print 'Maximum number of snps:',nSnps.max()
        t1 = time.time()
        print '.. finished in %s seconds'%(t1-t0)

    # plot distribution of nSnps 
    if options.plot_windows:
        print 'Plotting ditribution of number of SNPs'
        plot_file = options.wfile+'.wnd.pdf'
        plt = PL.subplot(1,1,1)
        PL.hist(nSnps,30)
        PL.xlabel('Number of SNPs')
        PL.ylabel('Number of windows')
        PL.savefig(plot_file)