Python readBED Examples, plink_reader.readBED Python Examples

Example #1

0

Show file

File: analyzeCore.py Project: jeffhsu3/limix

def scan(bfile,Y,cov,null,wnds,minSnps,i0,i1,perm_i,resfile,F,colCovarType_r='lowrank',rank_r=1):

    if perm_i is not None:
        print 'Generating permutation (permutation %d)'%perm_i
        np.random.seed(perm_i)
        perm = np.random.permutation(Y.shape[0])

    mtSet = limix.MTSet(Y=Y, S_R=cov['eval'], U_R=cov['evec'], F=F, rank=rank_r)
    mtSet.setNull(null)
    bim = plink_reader.readBIM(bfile,usecols=(0,1,2,3))
    fam = plink_reader.readFAM(bfile,usecols=(0,1))
   
    print 'fitting model'
    wnd_file = csv.writer(open(resfile,'wb'),delimiter='\t')
    for wnd_i in range(i0,i1):
        print '.. window %d - (%d, %d-%d) - %d snps'%(wnd_i,int(wnds[wnd_i,1]),int(wnds[wnd_i,2]),int(wnds[wnd_i,3]),int(wnds[wnd_i,-1]))
        if int(wnds[wnd_i,-1])<minSnps:
            print 'SKIPPED: number of snps lower than minSnps'
            continue
        #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1])))
        RV = plink_reader.readBED(bfile, useMAFencoding=True, blocksize = 1, start = int(wnds[wnd_i,4]), nSNPs = int(wnds[wnd_i,5]), order  = 'F',standardizeSNPs=False,ipos = 2,bim=bim,fam=fam)
        
        Xr = RV['snps']
        if perm_i is not None:
            Xr = Xr[perm,:]
        rv = mtSet.optimize(Xr)
        line = np.concatenate([wnds[wnd_i,:],rv['LLR']])
        wnd_file.writerow(line)
    pass

Example #2

0

Show file

File: preprocessCore.py Project: jeffhsu3/limix

def computePCsPython(out_dir,k,bfile,ffile):
    """ reading in """
    RV = plink_reader.readBED(bfile,useMAFencoding=True)
    X  = RV['snps']

    """ normalizing markers """
    print 'Normalizing SNPs...'
    p_ref = X.mean(axis=0)/2.
    X -= 2*p_ref

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        X /= sp.sqrt(2*p_ref*(1-p_ref))
        
    hasNan = sp.any(sp.isnan(X),axis=0)
    print '%d SNPs have a nan entry. Exluding them for computing the covariance matrix.'%hasNan.sum()
    X  = X[:,~hasNan]

    
    """ computing prinicipal components """
    U,S,Vt = ssl.svds(X,k=k)
    U -= U.mean(0)
    U /= U.std(0)
    U  = U[:,::-1]
 
    """ saving to output """
    np.savetxt(ffile, U, delimiter='\t',fmt='%.6f')

Example #3

0

Show file

def computeCovarianceMatrixPython(out_dir, bfile, cfile, sim_type='RRM'):
    print "Using python to create covariance matrix. This might be slow. We recommend using plink instead."

    if sim_type is not 'RRM':
        raise Exception('sim_type %s is not known' % sim_type)
    """ loading data """
    data = plink_reader.readBED(bfile, useMAFencoding=True)
    iid = data['iid']
    X = data['snps']
    N = X.shape[1]
    print '%d variants loaded.' % N
    print '%d people loaded.' % X.shape[0]
    """ normalizing markers """
    print 'Normalizing SNPs...'
    p_ref = X.mean(axis=0) / 2.
    X -= 2 * p_ref

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        X /= sp.sqrt(2 * p_ref * (1 - p_ref))

    hasNan = sp.any(sp.isnan(X), axis=0)
    print '%d SNPs have a nan entry. Exluding them for computing the covariance matrix.' % hasNan.sum(
    )
    """ computing covariance matrix """
    print 'Computing relationship matrix...'
    K = sp.dot(X[:, ~hasNan], X[:, ~hasNan].T)
    K /= 1. * N
    print 'Relationship matrix calculation complete'
    print 'Relationship matrix written to %s.cov.' % cfile
    print 'IDs written to %s.cov.id.' % cfile
    """ saving to output """
    np.savetxt(cfile + '.cov', K, delimiter='\t', fmt='%.6f')
    np.savetxt(cfile + '.cov.id', iid, delimiter=' ', fmt='%s')

Example #4

0

Show file

File: simulator.py Project: jeffhsu3/limix

    def getRegion(self,size=3e4,min_nSNPs=1,chrom_i=None,pos_min=None,pos_max=None):
        """
        Sample a region from the piece of genotype X, chrom, pos
        minSNPnum:  minimum number of SNPs contained in the region
        Ichrom:  restrict X to chromosome Ichrom before taking the region
        cis:        bool vector that marks the sorted region
        region:  vector that contains chrom and init and final position of the region
        """
        if (self.chrom is None) or (self.pos is None):
            bim = plink_reader.readBIM(self.bfile,usecols=(0,1,2,3))
            chrom = SP.array(bim[:,0],dtype=int)
            pos   = SP.array(bim[:,3],dtype=int)
        else:
            chrom = self.chrom
            pos   = self.pos
            
        if chrom_i is None:
            n_chroms = chrom.max()
            chrom_i  = int(SP.ceil(SP.rand()*n_chroms))

        pos   = pos[chrom==chrom_i]
        chrom = chrom[chrom==chrom_i]

        ipos = SP.ones(len(pos),dtype=bool)
        if pos_min is not None:
            ipos = SP.logical_and(ipos,pos_min<pos)

        if pos_max is not None:
            ipos = SP.logical_and(ipos,pos<pos_max)

        pos = pos[ipos]
        chrom = chrom[ipos]
        
        if size==1:
            # select single SNP
            idx = int(SP.ceil(pos.shape[0]*SP.rand()))
            cis  = SP.arange(pos.shape[0])==idx
            region = SP.array([chrom_i,pos[idx],pos[idx]])
        else:
            while 1:
                idx = int(SP.floor(pos.shape[0]*SP.rand()))
                posT1 = pos[idx]
                posT2 = pos[idx]+size
                if posT2<=pos.max():
                    cis = chrom==chrom_i
                    cis*= (pos>posT1)*(pos<posT2)
                    if cis.sum()>min_nSNPs: break
            region = SP.array([chrom_i,posT1,posT2])

        start = SP.nonzero(cis)[0].min()
        nSNPs  = cis.sum()

        if self.X is None:
            rv = plink_reader.readBED(self.bfile,useMAFencoding=True,start = start, nSNPs = nSNPs,bim=bim)
            Xr = rv['snps']
        else:
            Xr = self.X[:,start:start+nSnps]
            
        return Xr, region

Example #5

0

Show file

File: analyzeCore.py Project: mattions/limix

def scan(bfile,
         Y,
         cov,
         null,
         wnds,
         minSnps,
         i0,
         i1,
         perm_i,
         resfile,
         F,
         colCovarType_r='lowrank',
         rank_r=1):

    if perm_i is not None:
        print 'Generating permutation (permutation %d)' % perm_i
        np.random.seed(perm_i)
        perm = np.random.permutation(Y.shape[0])

    mtSet = limix.MTSet(Y=Y,
                        S_R=cov['eval'],
                        U_R=cov['evec'],
                        F=F,
                        rank=rank_r)
    mtSet.setNull(null)
    bim = plink_reader.readBIM(bfile, usecols=(0, 1, 2, 3))
    fam = plink_reader.readFAM(bfile, usecols=(0, 1))

    print 'fitting model'
    wnd_file = csv.writer(open(resfile, 'wb'), delimiter='\t')
    for wnd_i in range(i0, i1):
        print '.. window %d - (%d, %d-%d) - %d snps' % (
            wnd_i, int(wnds[wnd_i, 1]), int(wnds[wnd_i, 2]), int(
                wnds[wnd_i, 3]), int(wnds[wnd_i, -1]))
        if int(wnds[wnd_i, -1]) < minSnps:
            print 'SKIPPED: number of snps lower than minSnps'
            continue
        #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1])))
        RV = plink_reader.readBED(bfile,
                                  useMAFencoding=True,
                                  blocksize=1,
                                  start=int(wnds[wnd_i, 4]),
                                  nSNPs=int(wnds[wnd_i, 5]),
                                  order='F',
                                  standardizeSNPs=False,
                                  ipos=2,
                                  bim=bim,
                                  fam=fam)

        Xr = RV['snps']
        if perm_i is not None:
            Xr = Xr[perm, :]
        rv = mtSet.optimize(Xr)
        line = np.concatenate([wnds[wnd_i, :], rv['LLR']])
        wnd_file.writerow(line)
    pass

Example #6

0

Show file

File: simulator.py Project: jeffhsu3/limix

    def _genBgTerm_fromSNPs(self,vTot=0.5,vCommon=0.1,pCausal=0.5,plot=False):
        """ generate  """

        if self.X is None:
            print 'Reading in all SNPs. This is slow.'
            rv = plink_reader.readBED(self.bfile,useMAFencoding=True)
            X  = rv['snps']
        else:
            X  = self.X
        
        S  = X.shape[1]
        vSpecific = vTot-vCommon

        # select causal SNPs
        nCausal = int(SP.floor(pCausal*S))
        Ic = selectRnd(nCausal,S)
        X = X[:,Ic]

        # common effect
        Bc  = SP.dot(self.genWeights(nCausal,self.P),self.genTraitEffect())        
        Yc  = SP.dot(X,Bc)
        Yc *= SP.sqrt(vCommon/Yc.var(0).mean())

        # indipendent effect
        Bi  = SP.randn(nCausal,self.P)
        Yi  = SP.dot(X,Bi)
        Yi *= SP.sqrt(vSpecific/Yi.var(0).mean())

        if plot:
            import pylab as PL
            PL.ion()
            for p in range(self.P):
                PL.subplot(self.P,1,p+1)
                PL.plot(SP.arange(self.X.shape[1])[Ic],Bc[:,p],'o',color='y',alpha=0.05)
                PL.plot(SP.arange(self.X.shape[1])[Ic],Bi[:,p],'o',color='r',alpha=0.05) 
                #PL.ylim(-2,2)
                PL.plot([0,Ic.shape[0]],[0,0],'k')

        return Yc, Yi

Example #7

0

Show file

File: preprocessCore.py Project: jeffhsu3/limix

def computeCovarianceMatrixPython(out_dir,bfile,cfile,sim_type='RRM'):
    print "Using python to create covariance matrix. This might be slow. We recommend using plink instead."

    if sim_type is not 'RRM':
        raise Exception('sim_type %s is not known'%sim_type)

    """ loading data """
    data = plink_reader.readBED(bfile,useMAFencoding=True)
    iid  = data['iid']
    X = data['snps']
    N = X.shape[1]
    print '%d variants loaded.'%N
    print '%d people loaded.'%X.shape[0]
    
    """ normalizing markers """
    print 'Normalizing SNPs...'
    p_ref = X.mean(axis=0)/2.
    X -= 2*p_ref

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        X /= sp.sqrt(2*p_ref*(1-p_ref))
        
    hasNan = sp.any(sp.isnan(X),axis=0)
    print '%d SNPs have a nan entry. Exluding them for computing the covariance matrix.'%hasNan.sum()

    """ computing covariance matrix """
    print 'Computing relationship matrix...'
    K = sp.dot(X[:,~hasNan],X[:,~hasNan].T)
    K/= 1.*N
    print 'Relationship matrix calculation complete'
    print 'Relationship matrix written to %s.cov.'%cfile
    print 'IDs written to %s.cov.id.'%cfile

    """ saving to output """
    np.savetxt(cfile + '.cov', K, delimiter='\t',fmt='%.6f')
    np.savetxt(cfile + '.cov.id', iid, delimiter=' ',fmt='%s')

Example #8

0

Show file

def computePCsPython(out_dir, k, bfile, ffile):
    """ reading in """
    RV = plink_reader.readBED(bfile, useMAFencoding=True)
    X = RV['snps']
    """ normalizing markers """
    print 'Normalizing SNPs...'
    p_ref = X.mean(axis=0) / 2.
    X -= 2 * p_ref

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        X /= sp.sqrt(2 * p_ref * (1 - p_ref))

    hasNan = sp.any(sp.isnan(X), axis=0)
    print '%d SNPs have a nan entry. Exluding them for computing the covariance matrix.' % hasNan.sum(
    )
    X = X[:, ~hasNan]
    """ computing prinicipal components """
    U, S, Vt = ssl.svds(X, k=k)
    U -= U.mean(0)
    U /= U.std(0)
    U = U[:, ::-1]
    """ saving to output """
    np.savetxt(ffile, U, delimiter='\t', fmt='%.6f')