def setUpClass(self): currentFolder = os.path.dirname(os.path.realpath(__file__)) self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata" self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt" #self.cov_fn = currentFolder + "/examples/toydata.cov" # load data ################################################################### snp_reader = Bed(self.snp_fn) pheno = pstpheno.loadOnePhen(self.pheno_fn) #cov = pstpheno.loadPhen(self.cov_fn) # intersect sample ids snp_reader, pheno = pysnptools.util.intersect_apply( [snp_reader, pheno]) self.G = snp_reader.read(order='C').val self.G = stdizer.Unit().standardize(self.G) self.G.flags.writeable = False self.y = pheno['vals'][:, 0] self.y.flags.writeable = False # load pcs #self.G_cov = cov['vals'] self.G_cov = np.ones((len(self.y), 1)) self.G_cov.flags.writeable = False
def main(): """ example that compares output to fastlmmc """ # set up data phen_fn = "../feature_selection/examples/toydata.phe" snp_fn = "../feature_selection/examples/toydata.5chrom.bed" #chrom_count = 5 # load data ################################################################### snp_reader = Bed(snp_fn) pheno = pstpheno.loadOnePhen(phen_fn) cov = None #cov = pstpheno.loadPhen(self.cov_fn) snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov]) G = snp_reader.read(order='C').val G = stdizer.Unit().standardize(G) G.flags.writeable = False y = pheno['vals'][:, 0] y.flags.writeable # load pcs #G_pc = cov['vals'] #G_pc.flags.writeable = False delta = 2.0 gwas = WindowingGwas(G, y, delta=delta) pv = gwas.run_gwas() from fastlmm.association.tests.test_gwas import GwasTest REML = False snp_pos_sim = snp_reader.sid snp_pos_test = snp_reader.sid os.environ["FastLmmUseAnyMklLib"] = "1" gwas_c = GwasTest(snp_fn, phen_fn, snp_pos_sim, snp_pos_test, delta, REML=REML, excludeByPosition=0) gwas_c.run_gwas() import pylab pylab.plot(np.log(pv), np.log(gwas_c.p_values), "+") pylab.plot(np.arange(-18, 0), np.arange(-18, 0), "-k") pylab.show() np.testing.assert_array_almost_equal(np.log(pv), np.log(gwas_c.p_values), decimal=3) simple_manhattan_plot(pv)
def load_intersect(snp_reader, pheno_fn_or_none, snp_set=AllSnps()): """ load SNPs and phenotype, intersect ids ---------------------------------------------------------------------- Input: bed_reader : SnpReader object (e.g. BedReader) pheno_fn : str, file name of phenotype file, defa ---------------------------------------------------------------------- Output: G : numpy array containing SNP data y : numpy (1d) containing phenotype ---------------------------------------------------------------------- """ standardizer = stdizer.Unit() geno = snp_reader.read(order='C', snp_set=snp_set) G = geno['snps'] G = standardizer.standardize(G) snp_names = geno['rs'] chr_ids = geno['pos'][:, 0] if not pheno_fn_or_none is None: # load phenotype pheno = pstpheno.loadOnePhen(pheno_fn_or_none, 0) y = pheno['vals'][:, 0] # load covariates and intersect ids import warnings warnings.warn( "This intersect_ids is deprecated. Pysnptools includes newer versions of intersect_ids", DeprecationWarning) indarr = util.intersect_ids([pheno['iid'], snp_reader.original_iids]) #print "warning: random phen" #y = np.random.random_sample(len(y)) if not (indarr[:, 0] == indarr[:, 1]).all(): assert False, "ERROR: this code assumes the same order for snp and phen file" print "reindexing" y = y[indarr[:, 0]] G = G[indarr[:, 1]] else: y = None return G, y, snp_names, chr_ids
def standardize(snps, blocksize=None, standardizer=stdizer.Unit(), force_python_only=False): '''Does in-place standardization. Will use C++ if possible (for single and double, unit and beta, order="F" and order="C") ''' #!!warnings.warn("This standardizer is deprecated. Pysnptools includes newer versions of standardization", DeprecationWarning) if isinstance(standardizer, str): standardizer = standardizer.factor(standardizer) if blocksize is not None and blocksize >= snps.shape[ 1]: #If blocksize is larger than the # of snps, set it to None blocksize = None return standardizer.standardize(snps, blocksize=blocksize, force_python_only=force_python_only)
def load_GWAS_data(genotype_file, phenotype_file, covariates_file=None, exms=-1, permute=True): import pysnptools.pysnptools.util.util from pysnptools.pysnptools.snpreader.bed import Bed import fastlmm.util.standardizer as stdizer import fastlmm.pyplink.plink as plink snp_reader = Bed(genotype_file) phenotype = plink.loadOnePhen(phenotype_file) if covariates_file is not None: covariates = plink.loadPhen(covariates_file) else: covariates = None snp_reader, phenotype, covariates = pysnptools.pysnptools.util.util.intersect_apply( [snp_reader, phenotype, covariates]) if exms > 0: #subset number individuals if permute: print snp_reader.iid_count perms = np.random.permutation(range(snp_reader.iid_count)) np.savetxt('inds.txt', perms[0:exms]) snp_reader = snp_reader[perms[0:exms], :] else: snp_reader = snp_reader[0:exms, :] #read the SNPs snp_data = snp_reader.read(order='C') X = snp_data.val if covariates is not None: X = np.hstack((covariates, X)) stdizer.Unit().standardize(X) X.flags.writeable = False y = phenotype['vals'][:, 0] print "done reading" return (X, y)