def getRegion(self, size=3e4, min_nSNPs=1, chrom_i=None, pos_min=None, pos_max=None): """ Sample a region from the piece of genotype X, chrom, pos minSNPnum: minimum number of SNPs contained in the region Ichrom: restrict X to chromosome Ichrom before taking the region cis: bool vector that marks the sorted region region: vector that contains chrom and init and final position of the region """ bim = plink_reader.readBIM(self.bfile, usecols=(0, 1, 2, 3)) chrom = SP.array(bim[:, 0], dtype=int) pos = SP.array(bim[:, 3], dtype=int) if chrom_i is None: n_chroms = chrom.max() chrom_i = int(SP.ceil(SP.rand() * n_chroms)) pos = pos[chrom == chrom_i] chrom = chrom[chrom == chrom_i] ipos = SP.ones(len(pos), dtype=bool) if pos_min is not None: ipos = SP.logical_and(ipos, pos_min < pos) if pos_max is not None: ipos = SP.logical_and(ipos, pos < pos_max) pos = pos[ipos] chrom = chrom[ipos] if size == 1: # select single SNP idx = int(SP.ceil(pos.shape[0] * SP.rand())) cis = SP.arange(pos.shape[0]) == idx region = SP.array([chrom_i, pos[idx], pos[idx]]) else: while 1: idx = int(SP.floor(pos.shape[0] * SP.rand())) posT1 = pos[idx] posT2 = pos[idx] + size if posT2 <= pos.max(): cis = chrom == chrom_i cis *= (pos > posT1) * (pos < posT2) if cis.sum() > min_nSNPs: break region = SP.array([chrom_i, posT1, posT2]) start = SP.nonzero(cis)[0].min() nSNPs = cis.sum() rv = plink_reader.readBED(self.bfile, useMAFencoding=True, start=start, nSNPs=nSNPs, bim=bim) Xr = rv['snps'] return Xr, region
def scan(bfile,Y,cov,null,wnds,minSnps,i0,i1,perm_i,resfile,F): if perm_i is not None: print 'Generating permutation (permutation %d)'%perm_i NP.random.seed(perm_i) perm = NP.random.permutation(Y.shape[0]) mtSet = MTST.MultiTraitSetTest(Y,S_XX=cov['eval'],U_XX=cov['evec'],F=F) mtSet.setNull(null) bim = plink_reader.readBIM(bfile,usecols=(0,1,2,3)) fam = plink_reader.readFAM(bfile,usecols=(0,1)) print 'fitting model' wnd_file = csv.writer(open(resfile,'wb'),delimiter='\t') for wnd_i in range(i0,i1): print '.. window %d - (%d, %d-%d) - %d snps'%(wnd_i,int(wnds[wnd_i,1]),int(wnds[wnd_i,2]),int(wnds[wnd_i,3]),int(wnds[wnd_i,-1])) if int(wnds[wnd_i,-1])<minSnps: print 'SKIPPED: number of snps lower than minSnps' continue #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1]))) RV = plink_reader.readBED(bfile, useMAFencoding=True, blocksize = 1, start = int(wnds[wnd_i,4]), nSNPs = int(wnds[wnd_i,5]), order = 'F',standardizeSNPs=False,ipos = 2,bim=bim,fam=fam) Xr = RV['snps'] if perm_i is not None: Xr = Xr[perm,:] rv = mtSet.optimize(Xr) line = NP.concatenate([wnds[wnd_i,:],rv['LLR']]) wnd_file.writerow(line) pass
def getRegion(self, size=3e4, min_nSNPs=1, chrom_i=None, pos_min=None, pos_max=None): """ Sample a region from the piece of genotype X, chrom, pos minSNPnum: minimum number of SNPs contained in the region Ichrom: restrict X to chromosome Ichrom before taking the region cis: bool vector that marks the sorted region region: vector that contains chrom and init and final position of the region """ bim = plink_reader.readBIM(self.bfile, usecols=(0, 1, 2, 3)) chrom = SP.array(bim[:, 0], dtype=int) pos = SP.array(bim[:, 3], dtype=int) if chrom_i is None: n_chroms = chrom.max() chrom_i = int(SP.ceil(SP.rand() * n_chroms)) pos = pos[chrom == chrom_i] chrom = chrom[chrom == chrom_i] ipos = SP.ones(len(pos), dtype=bool) if pos_min is not None: ipos = SP.logical_and(ipos, pos_min < pos) if pos_max is not None: ipos = SP.logical_and(ipos, pos < pos_max) pos = pos[ipos] chrom = chrom[ipos] if size == 1: # select single SNP idx = int(SP.ceil(pos.shape[0] * SP.rand())) cis = SP.arange(pos.shape[0]) == idx region = SP.array([chrom_i, pos[idx], pos[idx]]) else: while 1: idx = int(SP.floor(pos.shape[0] * SP.rand())) posT1 = pos[idx] posT2 = pos[idx] + size if posT2 <= pos.max(): cis = chrom == chrom_i cis *= (pos > posT1) * (pos < posT2) if cis.sum() > min_nSNPs: break region = SP.array([chrom_i, posT1, posT2]) start = SP.nonzero(cis)[0].min() nSNPs = cis.sum() rv = plink_reader.readBED(self.bfile, useMAFencoding=True, start=start, nSNPs=nSNPs, bim=bim) Xr = rv["snps"] return Xr, region
def scan(bfile, Y, cov, null, wnds, minSnps, i0, i1, perm_i, resfile, F): if perm_i is not None: print 'Generating permutation (permutation %d)' % perm_i NP.random.seed(perm_i) perm = NP.random.permutation(Y.shape[0]) mtSet = MTST.MultiTraitSetTest(Y, S_XX=cov['eval'], U_XX=cov['evec'], F=F) mtSet.setNull(null) bim = plink_reader.readBIM(bfile, usecols=(0, 1, 2, 3)) fam = plink_reader.readFAM(bfile, usecols=(0, 1)) print 'fitting model' wnd_file = csv.writer(open(resfile, 'wb'), delimiter='\t') for wnd_i in range(i0, i1): print '.. window %d - (%d, %d-%d) - %d snps' % ( wnd_i, int(wnds[wnd_i, 1]), int(wnds[wnd_i, 2]), int( wnds[wnd_i, 3]), int(wnds[wnd_i, -1])) if int(wnds[wnd_i, -1]) < minSnps: print 'SKIPPED: number of snps lower than minSnps' continue #RV = bed.read(PositionRange(int(wnds[wnd_i,-2]),int(wnds[wnd_i,-1]))) RV = plink_reader.readBED(bfile, useMAFencoding=True, blocksize=1, start=int(wnds[wnd_i, 4]), nSNPs=int(wnds[wnd_i, 5]), order='F', standardizeSNPs=False, ipos=2, bim=bim, fam=fam) Xr = RV['snps'] if perm_i is not None: Xr = Xr[perm, :] rv = mtSet.optimize(Xr) line = NP.concatenate([wnds[wnd_i, :], rv['LLR']]) wnd_file.writerow(line) pass