def _read_pstdata(self): #LATER switch it, so the main code is here rather than in loadPhen if isinstance(self.filename,str): pheno_input = pstpheno.loadPhen(self.filename,missing=self.missing) elif self.filename is None: assert self._iid_if_none is not None, "If input is None then iid_if_none be given" pheno_input = { 'header':np.empty((0),dtype='str'), 'vals': np.empty((len(self._iid_if_none), 0)), 'iid': self._iid_if_none } else: pheno_input = self.filename if len(pheno_input['vals'].shape) == 1: pheno_input = { 'header' : pheno_input['header'], 'vals' : np.reshape(pheno_input['vals'],(-1,1)), 'iid' : pheno_input['iid'] } if len(pheno_input['header']) > 0 and pheno_input['header'][0] is None: pheno_input['header'] = ["pheno{0}".format(i) for i in range(len(pheno_input['header']))] #LATER move to reader? elif len(pheno_input['header']) == 0: pheno_input['header'] = ["pheno{0}".format(i) for i in range(pheno_input['vals'].shape[1])] row = pheno_input['iid'] col = np.array(pheno_input['header'],dtype='str') col_property = np.empty((len(col),3)) col_property.fill(np.nan) val = pheno_input['vals'] snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val) return snpdata
def _read_pstdata(self): #LATER switch it, so the main code is here rather than in loadPhen if isinstance(self.filename,str): pheno_input = pstpheno.loadPhen(self.filename,missing=self.missing) elif self.filename is None: assert self._iid_if_none is not None, "If input is None then iid_if_none be given" pheno_input = { 'header':np.empty((0),dtype='str'), 'vals': np.empty((len(self._iid_if_none), 0)), 'iid': self._iid_if_none } else: pheno_input = self.filename if len(pheno_input['vals'].shape) == 1: pheno_input = { 'header' : pheno_input['header'], 'vals' : np.reshape(pheno_input['vals'],(-1,1)), 'iid' : pheno_input['iid'] } if len(pheno_input['header']) > 0 and pheno_input['header'][0] is None: pheno_input['header'] = ["pheno{0}".format(i) for i in xrange(len(pheno_input['header']))] #LATER move to reader? elif len(pheno_input['header']) == 0: pheno_input['header'] = ["pheno{0}".format(i) for i in xrange(pheno_input['vals'].shape[1])] row = pheno_input['iid'] col = np.array(pheno_input['header'],dtype='str') col_property = np.empty((len(col),3)) col_property.fill(np.nan) val = pheno_input['vals'] snpdata = SnpData(iid=row,sid=col,pos=col_property,val=val) return snpdata
def load_snp_data(snpreader, pheno_fn, cov_fn=None, offset=True, mpheno=0, standardizer=Unit()): """Load plink files ---------- snpreader : snpreader object object to read in binary SNP file pheno_fn : str File name of phenotype file cov_fn : str File name of covariates file offset : bool, default=True Adds offset to the covariates specified in cov_fn, if neccesssary Returns ------- G : array, shape = [n_samples, n_features] SNP matrix X : array, shape = [n_samples, n_covariates] Matrix of covariates (e.g. age, gender) y : array, shape = [n_samples] Phenotype (target) vector """ #TODO: completely remove this pheno = pstpheno.loadOnePhen(pheno_fn, mpheno, vectorize=True) geno = snpreader.read(order='C').standardize(standardizer) # sanity check #assert np.testing.assert_array_equal(ind_iid, pheno['iid'][indarr[:,0]]) # load covariates or generate vector of ones (for bias) if cov_fn == None: cov = {'vals': np.ones((len(pheno['iid']), 1)), 'iid': pheno['iid']} else: cov = pstpheno.loadPhen(cov_fn) (y, yiid), G, (X, xiid) = pstutil.intersect_apply( [(pheno['vals'], pheno['iid']), geno, (cov['vals'], cov['iid'])], sort_by_dataset=False) G = G.read(order='C', view_ok=True) # add bias column if not present if offset and sp.all(X.std(0) != 0): offset = sp.ones((len(indarr), 1)) X = sp.hstack((X, offset)) return G, X, y
def load_snp_data(snpreader, pheno_fn, cov_fn=None, offset=True, mpheno=0, standardizer=Unit()): """Load plink files ---------- snpreader : snpreader object object to read in binary SNP file pheno_fn : str File name of phenotype file cov_fn : str File name of covariates file offset : bool, default=True Adds offset to the covariates specified in cov_fn, if neccesssary Returns ------- G : array, shape = [n_samples, n_features] SNP matrix X : array, shape = [n_samples, n_covariates] Matrix of covariates (e.g. age, gender) y : array, shape = [n_samples] Phenotype (target) vector """ #TODO: completely remove this pheno = pstpheno.loadOnePhen(pheno_fn,mpheno, vectorize=True) geno = snpreader.read(order='C').standardize(standardizer) # sanity check #assert np.testing.assert_array_equal(ind_iid, pheno['iid'][indarr[:,0]]) # load covariates or generate vector of ones (for bias) if cov_fn == None: cov = {'vals': np.ones((len(pheno['iid']), 1)), 'iid':pheno['iid']} else: cov = pstpheno.loadPhen(cov_fn) (y, yiid), G, (X, xiid) = pstutil.intersect_apply([(pheno['vals'],pheno['iid']), geno, (cov['vals'],cov['iid'])], sort_by_dataset=False) G = G.read(order='C', view_ok=True) # add bias column if not present if offset and sp.all(X.std(0)!=0): offset = sp.ones((len(indarr),1)) X = sp.hstack((X,offset)) return G, X, y
def test_preload_files(self): logging.info("TestSingleSnp test_preload_files") test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps, count_A1=False) output_file_name = self.file_name("preload_files") frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0,leave_out_one_chrom=False, covar=covar, output_file_name=output_file_name,count_A1=False ) self.compare_files(frame,"one")
def load_covariates(self, pheno): if self.cov_fn == None: cov_iid = pheno['iid'] X = np.ones((len(cov_iid), 1)) else: cov = pstpheno.loadPhen(self.cov_fn) X = cov['vals'] cov_iid = cov['iid'] # add bias column if not present - #!! LATER -- Bug? should this test be done after intersection in case removing an iid makes it constant? if self.offset and sp.all(X.std(0) != 0): offset = sp.ones((len(X), 1)) self.X = sp.hstack((X, offset)) return X, cov_iid
def load_covariates(self, pheno): if self.cov_fn == None: cov_iid = pheno['iid'] X = np.ones((len(cov_iid), 1)) else: cov = pstpheno.loadPhen(self.cov_fn) X = cov['vals'] cov_iid = cov['iid'] # add bias column if not present - #!! LATER -- Bug? should this test be done after intersection in case removing an iid makes it constant? if self.offset and sp.all(X.std(0)!=0): offset = sp.ones((len(X),1)) self.X = sp.hstack((X, offset)) return X, cov_iid
def test_preload_files(self): logging.info("TestSingleSnp test_preload_files") from pysnptools.snpreader import Bed test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps) output_file_name = self.file_name("preload_files") frame = single_snp(test_snps=bed[:,:10], pheno=pheno, G0=test_snps, mixing=0, covar=covar, output_file_name=output_file_name ) self.compare_files(frame,"one")
def test_SNC(self): logging.info("TestSNC") test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps, count_A1=False) snc = bed.read() snc.val[:,2] = [0] * snc.iid_count # make SNP #2 have constant values (aka a SNC) output_file_name = self.file_name("snc") frame = single_snp(test_snps=snc[:,:10], pheno=pheno, G0=snc, mixing=0,leave_out_one_chrom=False, covar=covar, output_file_name=output_file_name,count_A1=False ) self.compare_files(frame,"snc")
def test_no_cov_b(self): logging.info("TestSingleSnp test_no_cov_b") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn output_file_name = self.file_name("no_cov_b") covar = pstpheno.loadPhen(self.cov_fn) covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns covar['header'] = [] frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, leave_out_one_chrom=False, covar=covar, mixing=0, output_file_name=output_file_name,count_A1=False ) self.compare_files(frame,"no_cov")
def test_no_cov_b(self): logging.info("TestSingleSnp test_no_cov_b") from pysnptools.snpreader import Bed test_snps = Bed(self.bedbase) pheno = self.phen_fn output_file_name = self.file_name("no_cov_b") covar = pstpheno.loadPhen(self.cov_fn) covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns frame = single_snp(test_snps=test_snps[:,:10], pheno=pheno, G0=test_snps, covar=covar, mixing=0, output_file_name=output_file_name ) self.compare_files(frame,"no_cov")
def test_preload_files(self): logging.info("TestSingleSnp test_preload_files") test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps) output_file_name = self.file_name("preload_files") frame = single_snp(test_snps=bed[:, :10], pheno=pheno, G0=test_snps, mixing=0, leave_out_one_chrom=False, covar=covar, output_file_name=output_file_name) self.compare_files(frame, "one")
def test_preload_files(self): logging.info("TestEpistasis test_preload_files") from pysnptools.snpreader import Bed test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn,vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps) output_file = self.file_name("preload_files") frame = epistasis(test_snps, pheno, G0=test_snps, covar=covar, sid_list_0=bed.sid[:10], #first 10 snps sid_list_1=bed.sid[5:15], #Skip 5 snps, use next 10 output_file_name=output_file ) sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue']) self.compare_files(sid0,sid1,pvalue_list,"one")
def _run_once(self): if self._ran_once: return self._ran_once = None if isinstance(self.test_snps, str): self.test_snps = Bed(self.test_snps) if isinstance(self.G0, str): self.G0 = Bed(self.G0) if isinstance(self.pheno, str): self.pheno = pstpheno.loadOnePhen(self.pheno, vectorize=True, missing='NaN') if self.covar is not None and isinstance(self.covar, str): self.covar = pstpheno.loadPhen(self.covar, missing='NaN') if self.G1_or_none is not None and isinstance(self.G1_or_none, str): self.G1_or_none = Bed(self.G1_or_none) if self.sid_list_0 is None: self.sid_list_0 = self.test_snps.sid if self.sid_list_1 is None: self.sid_list_1 = self.test_snps.sid self.set_sid_sets() #!!Should fix up to add only of no constant columns - will need to add a test case for this if self.covar is None: self.covar = np.ones((self.test_snps.iid_count, 1)) else: self.covar = np.hstack( (self.covar['vals'], np.ones((self.test_snps.iid_count, 1)))) self.n_cov = self.covar.shape[1] if self.output_file_or_none is None: self.__tempdirectory = ".working" else: self.__tempdirectory = self.output_file_or_none + ".working" self._ran_once = True
def test_no_cov_b(self): logging.info("TestEpistasis test_no_cov_b") from pysnptools.snpreader import Bed test_snps = Bed(self.bedbase) pheno = self.phen_fn output_file = self.file_name("no_cov_b") covar = pstpheno.loadPhen(self.cov_fn) covar['vals'] = np.delete(covar['vals'], np.s_[:],1) #Remove all the columns frame = epistasis(test_snps, pheno, G0=test_snps, covar=covar, sid_list_0=test_snps.sid[:10], #first 10 snps sid_list_1=test_snps.sid[5:15], #Skip 5 snps, use next 10 output_file_name=output_file ) sid0,sid1,pvalue_list =np.array(frame['SNP0']),np.array(frame['SNP1']),np.array(frame['PValue']) self.compare_files(sid0,sid1,pvalue_list,"no_cov")
def test_no_cov_b(self): logging.info("TestSingleSnp test_no_cov_b") from pysnptools.snpreader import Bed test_snps = Bed(self.bedbase) pheno = self.phen_fn output_file_name = self.file_name("no_cov_b") covar = pstpheno.loadPhen(self.cov_fn) covar['vals'] = np.delete(covar['vals'], np.s_[:], 1) #Remove all the columns frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps, covar=covar, mixing=0, output_file_name=output_file_name) self.compare_files(frame, "no_cov")
def _run_once(self): if self._ran_once: return self._ran_once = None if isinstance(self.test_snps, str): self.test_snps = Bed(self.test_snps) if isinstance(self.G0, str): self.G0 = Bed(self.G0) if isinstance(self.pheno, str): self.pheno = pstpheno.loadOnePhen(self.pheno,vectorize=True) #!! what about missing=-9? if self.covar is not None and isinstance(self.covar, str): self.covar = pstpheno.loadPhen(self.covar)#!! what about missing=-9? if self.G1_or_none is not None and isinstance(self.G1_or_none, str): self.G1_or_none = Bed(self.G1_or_none) if self.sid_list_0 is None: self.sid_list_0 = self.test_snps.sid if self.sid_list_1 is None: self.sid_list_1 = self.test_snps.sid self.set_sid_sets() #!!Should fix up to add only of no constant columns - will need to add a test case for this if self.covar is None: self.covar = np.ones((self.test_snps.iid_count, 1)) else: self.covar = np.hstack((self.covar['vals'],np.ones((self.test_snps.iid_count, 1)))) self.n_cov = self.covar.shape[1] if self.output_file_or_none is None: self.__tempdirectory = ".working" else: self.__tempdirectory = self.output_file_or_none + ".working" self._ran_once = True
def test_SNC(self): logging.info("TestSNC") test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps, count_A1=False) snc = bed.read() snc.val[:, 2] = 0 # make SNP #2 have constant values (aka a SNC) output_file_name = self.file_name("snc") frame = single_snp(test_snps=snc[:, :10], pheno=pheno, G0=snc, mixing=0, leave_out_one_chrom=False, covar=covar, output_file_name=output_file_name, count_A1=False) self.compare_files(frame, "snc")
def test_no_cov_b(self): logging.info("TestSingleSnp test_no_cov_b") test_snps = Bed(self.bedbase) pheno = self.phen_fn output_file_name = self.file_name("no_cov_b") covar = pstpheno.loadPhen(self.cov_fn) covar['vals'] = np.delete(covar['vals'], np.s_[:], 1) #Remove all the columns covar['header'] = [] frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps, leave_out_one_chrom=False, covar=covar, mixing=0, output_file_name=output_file_name) self.compare_files(frame, "no_cov")
def _pheno_fixup(pheno_input, iid_source_if_none=None): if isinstance(pheno_input, str): return pstpheno.loadPhen(pheno_input) #!!what about missing=-9? if pheno_input is None: ret = { 'header': [], 'vals': np.empty((iid_source_if_none['vals'].shape[0], 0)), 'iid': iid_source_if_none['iid'] } return ret if len(pheno_input['vals'].shape) == 1: ret = { 'header': pheno_input['header'], 'vals': np.reshape(pheno_input['vals'], (-1, 1)), 'iid': pheno_input['iid'] } return ret return pheno_input
def _pheno_fixup(pheno_input, iid_source_if_none=None): if isinstance(pheno_input, str): return pstpheno.loadPhen(pheno_input) #!!what about missing=-9? if pheno_input is None: ret = { 'header':[], 'vals': np.empty((iid_source_if_none['vals'].shape[0], 0)), 'iid':iid_source_if_none['iid'] } return ret if len(pheno_input['vals'].shape) == 1: ret = { 'header' : pheno_input['header'], 'vals' : np.reshape(pheno_input['vals'],(-1,1)), 'iid' : pheno_input['iid'] } return ret return pheno_input
def test_SNC(self): logging.info("TestSNC") from pysnptools.snpreader import Bed test_snps = self.bedbase pheno = pstpheno.loadOnePhen(self.phen_fn, vectorize=True) covar = pstpheno.loadPhen(self.cov_fn) bed = Bed(test_snps) snc = bed.read() snc.val[:, 2] = [ 0 ] * snc.iid_count # make SNP #2 have constant values (aka a SNC) output_file_name = self.file_name("snc") frame = single_snp(test_snps=snc[:, :10], pheno=pheno, G0=snc, mixing=0, covar=covar, output_file_name=output_file_name) self.compare_files(frame, "snc")
def loadCovars(bed, covarFile): covarsDict = phenoUtils.loadPhen(covarFile) checkIntersection(bed, covarsDict, 'covariates', checkSuperSet=True) _, covarsDict = pstutil.intersect_apply([bed, covarsDict]) covar = covarsDict['vals'] return covar