def divideData(self, filename, num=5, mph=3, delet=True): print "Estimating heritability using " + str(num) + " components" direct = "TEMP" sFil = Bed(filename) yFil = Pheno(filename + ".fam") n = sFil.iid_count reOrd = perm(n) yFil = yFil[reOrd, :] sFil = sFil[reOrd, :] y = yFil.read().val[:, 3] div = [int(math.ceil(i * n / float(num))) for i in range(0, num + 1)] varEsts = [] for i in range(0, num): print "For component " + str(i) sFilTemp = self.BED[div[i]:div[i + 1], :] Xtemp = sFilTemp.read().standardize().val ytemp = y[div[i]:div[i + 1]] varEsts.append(self.VarCalc.RealVar(ytemp, Xtemp)) return varEsts
def test_two(self): #!!! rather a big test case from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc logging.info("TestSingleSnpAllPlusSelect test_two") do_plot = False bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" # partition snps on chr5 vs rest test_chr = 5 snp_reader = Bed(bed_fn, count_A1=False) test_snps = snp_reader[:, snp_reader.pos[:, 0] == test_chr] mf_name = "lmpl" #"lmpl" "local", "coreP", "nodeP", "socketP", "nodeE", "lmp" runner = mf_to_runner_function(mf_name)(20) output_file_name = self.file_name("two") for GB_goal in [None, 2]: results = single_snp_all_plus_select( test_snps=test_snps, G=bed_fn, pheno=pheno_fn, covar=cov_fn, k_list=[int(k) for k in np.logspace(0, 7, base=2, num=7)], n_folds=7, seed=42, do_plot=do_plot, GB_goal=GB_goal, output_file_name=output_file_name, runner=runner, count_A1=False) logging.info(results.head()) self.compare_files(results, "two")
def test_intersection(self): from pysnptools.standardizer import Unit from pysnptools.kernelreader import SnpKernel from pysnptools.snpreader import Pheno from pysnptools.kernelreader._subset import _KernelSubset from pysnptools.snpreader._subset import _SnpSubset from pysnptools.util import intersect_apply snps_all = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed", count_A1=False) k = SnpKernel(snps_all, stdizer.Identity()) pheno = Pheno(self.currentFolder + "/../examples/toydata.phe") pheno = pheno[1:, :] # To test intersection we remove a iid from pheno k1, pheno = intersect_apply([ k, pheno ]) #SnpKernel is special because it standardizes AFTER intersecting. assert isinstance(k1.snpreader, _SnpSubset) and not isinstance(k1, _KernelSubset) #What happens with fancy selection? k2 = k[::2] assert isinstance(k2, SnpKernel) logging.info("Done with test_intersection")
def __init__(self,args): if args.window_type not in ['KBP','SNP']: raise ValueError('Window type not supported') bed_1 = Bed(args.bfile,count_A1=False) # af1 = self.get_allele_frequency(bed_1,args) # print(len(af1), "SNPs in file 1") snps_1 = (af1>args.maf)&(af1<1-args.maf) # print(np.sum(snps_1), "SNPs in file 1 after MAF filter") if (args.from_bp is not None) and (args.to_bp is not None): k = (bed_1.pos[:,2]>args.from_bp)&(bed_1.pos[:,2]<args.to_bp) snps_1 = snps_1&k snps_to_use = bed_1.sid[snps_1] if args.extract is not None: keep = np.array([l.strip() for l in open(args.extract,'r')]) snps_to_use = np.intersect1d(snps_to_use,keep) print(len(snps_to_use),"SNPs remaining after extraction") bed_1_index = np.sort(bed_1.sid_to_index(snps_to_use)) # pos = bed_1.pos[bed_1_index] # bim_1=pd.read_table(bed_1.filename+'.bim',header=None, names=['chm','id','pos_mb','pos_bp','a1','a2']) af = af1[bed_1_index] # # if args.afile is not None: # a1 = pd.read_table(args.afile,header=None,sep='\s*', # names=['id1','id2','theta']) # else: a1 = None self.af = af self.M = len(bed_1_index) # self.windows = self.get_windows(pos,args) # self.chr = pos[:,0] self.pos = pos[:,2] self.id = bed_1.sid[bed_1_index] self.A1 = bim_1['a1'].loc[bed_1_index] self.A2 = bim_1['a2'].loc[bed_1_index] self.scores = self.compute(bed_1,bed_1_index,af,a1,args) #
def test_notebook(self): do_plot = False mf_name = "lmp" #"local", "coreP", "nodeP", "socketP", "nodeE", "lmp" runner = mf_to_runner_function(mf_name)(4) output_file_name = self.file_name("notebook") logging.info("TestSingleSnpAllPlusSelect test_one") # define file names snp_reader = Bed(self.pythonpath + "/tests/datasets/synth/all", count_A1=False) pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" # find the chr5 SNPs test_snps = snp_reader[:, snp_reader.pos[:, 0] == 5] #select the 2nd kernel and run GWAS results = single_snp_all_plus_select(test_snps=test_snps, G=snp_reader, pheno=pheno_fn, GB_goal=2, do_plot=do_plot, output_file_name=output_file_name, runner=runner, count_A1=False) self.compare_files(results, "notebook")
def merge_ld(bfile, ld_dir): geno = Bed(bfile, count_A1=False) snp_num = geno.col_count cov = np.zeros([snp_num, snp_num]) part_info = pd.read_table(join(ld_dir, 'part.info'), header=None, sep='\t', names=['row', 'col']) for part_i, part in part_info.iterrows(): row_start, row_end = [ int(i) for i in part_info['row'][part_i].split('-') ] col_start, col_end = [ int(i) for i in part_info['col'][part_i].split('-') ] cov[row_start:row_end, col_start:col_end] = np.load( join(ld_dir, 'part_{}.npy'.format(part_i + 1))) stddev = np.sqrt(np.diag(cov)) cov /= stddev[:, None] cov /= stddev[None, :] inv_cov, rank = linalg.pinvh(cov, return_rank=True) np.save(join(ld_dir, 'inv_ld.npy'), inv_cov) with open(join(ld_dir, 'rank.txt'), 'w') as f: f.write(str(rank))
def test_one(self): logging.info("TestEpistasis test_one") from pysnptools.snpreader import Bed test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file = self.file_name("one") frame = epistasis( test_snps, pheno, G0=test_snps, covar=covar, sid_list_0=test_snps.sid[:10], #first 10 snps sid_list_1=test_snps.sid[5:15], #Skip 5 snps, use next 10 output_file_name=output_file, count_A1=False) sid0, sid1, pvalue_list = np.array(frame['SNP0']), np.array( frame['SNP1']), np.array(frame['PValue']) #Check the output file self.compare_files(sid0, sid1, pvalue_list, "one") #Check the values returned output_file2 = self.file_name("one_again") write(sid0, sid1, pvalue_list, output_file2) self.compare_files(sid0, sid1, pvalue_list, "one")
def test_linreg(self): logging.info("TestSingleSnp test_linreg") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file = self.file_name("linreg") frame1 = single_snp(test_snps=test_snps[:, :10], pheno=pheno, mixing=0, leave_out_one_chrom=False, G0=KernelIdentity(iid=test_snps.iid), covar=covar, output_file_name=output_file, count_A1=False) frame1 = frame1[[ 'sid_index', 'SNP', 'Chr', 'GenDist', 'ChrPos', 'PValue' ]] self.compare_files(frame1, "linreg") frame2 = single_snp_linreg(test_snps=test_snps[:, :10], pheno=pheno, covar=covar, output_file_name=output_file) self.compare_files(frame2, "linreg")
def test_gb_goal(self): logging.info("TestSingleSnp test_gb_goal") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file = self.file_name("gb_goal") frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, mixing=0, leave_out_one_chrom=False, G0=test_snps, covar=covar, GB_goal=0, output_file_name=output_file, count_A1=False) self.compare_files(frame, "one") output_file = self.file_name("gb_goal2") frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, mixing=0, leave_out_one_chrom=False, G0=test_snps, covar=covar, GB_goal=.12, output_file_name=output_file, count_A1=False) self.compare_files(frame, "one")
def __init__(self, prefix, case_file): self.prefix = prefix self.case_file = case_file self.snpreader = Bed(f"{prefix}.bed", count_A1=False) if self.snpreader.pos.dtype != 'int64': self.snpreader.pos[:,0] = np.vectorize(replace)(self.snpreader.pos[:,0]) self.snpreader.pos[:,1] = self.snpreader.pos[:,0] * 100000000000 + self.snpreader.pos[:,2] self.snpdata = self.snpreader.read() print('SNP data loaded.') self.chr_list = list(set(self.snpreader.pos[:,0])) self.Chr = self.snpreader.pos[:,0] self.Position = self.snpreader.pos[:,1] self.bp = self.snpreader.pos[:,2] self.SNPID = self.snpreader.sid self.case = np.loadtxt(case_file, dtype=self.snpreader.iid.dtype)[:,:2] self.case_list = list(self.case) self.all_list = list([tuple(x) for x in self.snpreader.iid]) self.caseset = set([tuple(x) for x in self.case]) self.control_list = [list(x) for x in self.all_list if x not in self.caseset] self.numSNP = self.snpreader.sid_count self.numSample = len(self.all_list) self.numCase = len(self.case_list) self.numControl = len(self.control_list) self.case_geno = self.snpdata.val[self.snpreader.iid_to_index(self.case)] L = [] for i in self.case_list: L.append(i[1].decode('utf-8')) self.case_list_print = '\n'.join(L) print('Case individuals are: \n') print(self.case_list_print) print('\n')
def test_leave_one_out_with_prekernels(self): logging.info( "TestSingleSnpLeaveOutOneChrom test_leave_one_out_with_prekernels") from pysnptools.kernelstandardizer import DiagKtoN test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn chrom_to_kernel = {} with patch.dict('os.environ', {'ARRAY_MODULE': 'numpy'}) as _: for chrom in np.unique(test_snps.pos[:, 0]): other_snps = test_snps[:, test_snps.pos[:, 0] != chrom] kernel = other_snps.read_kernel( standardizer=Unit(), block_size=500 ) #Create a kernel from the SNPs not used in testing chrom_to_kernel[chrom] = kernel.standardize( DiagKtoN() ) #improves the kernel numerically by making its diagonal sum to iid_count output_file = self.file_name("one_looc_prekernel") frame = single_snp(test_snps, pheno, covar=covar, K0=chrom_to_kernel, output_file_name=output_file, count_A1=False) self.compare_files(frame, "one_looc")
def too_slow_test_notebook(self): do_plot = False runner = LocalMultiProc(multiprocessing.cpu_count(), mkl_num_threads=2) output_file_name = self.file_name("notebook") logging.info("TestSingleSnpAllPlusSelect test_notebook") # define file names snp_reader = Bed(self.pythonpath + "/tests/datasets/synth/all.bed", count_A1=False) pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" # find the chr5 SNPs test_snps = snp_reader[:, snp_reader.pos[:, 0] == 5] #select the 2nd kernel and run GWAS results = single_snp_all_plus_select(test_snps=test_snps, G=snp_reader, pheno=pheno_fn, GB_goal=2, do_plot=do_plot, output_file_name=output_file_name, runner=runner, count_A1=False) self.compare_files(results, "notebook")
def test_snp_dist2(self): logging.info("in test_snp_dist2") snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed", count_A1=False) snp2dist = snpreader.as_dist(max_weight=2) s = str(snp2dist) _fortesting_JustCheckExists().input(snp2dist)
def __init__(self,filename,snpfile="",params="",n0=-1,n1=-1): self.BED=Bed(filename); self.pheno=Pheno(filename+".fam"); self.y=self.pheno.read().val[:,3]; self.y=self.y-1.0; self.params=params; n=len(self.y) if n0>0: print "Initiate with n0" I0=[i for i in range(0,n) if self.y[i]==0.0] I0=I0[:n0] I1=[i for i in range(0,n) if self.y[i]==1.0] I1=I1[:n1] I0.extend(I1); self.y=self.y[I0] self.BED=self.BED[I0,:] try: if len(snpfile)>0: fil=open(snpfile) lines=fil.readlines(); fil.close(); self.snps=[l.strip() for l in lines] else: self.snps=self.BED.sid; except: print "Error loading SNPs!" sys.exit(); self.setUp(); self.n=len(self.y) print "Number of individuals: "+str(self.n) self.Cov=[]; self.params="";
def readFiles(self): print 'Reading Data ...' X = None y = None Xname = None if self.fileType == 'plink': from pysnptools.snpreader import Bed snpreader = Bed(self.fileName + '.bed') snpdata = snpreader.read() X = snpdata.val Xname = snpdata.sid # from pysnptools.snpreader import Pheno # phenoreader = Pheno(self.fileName+".fam") # phenodata = phenoreader.read() # y = phenodata.val[:,-1] y = self.famReader(self.fileName + ".fam") if self.fileType == 'csv': X = np.loadtxt(self.fileName + '.geno.csv', delimiter=',') y = np.loadtxt(self.fileName + '.pheno.csv', delimiter=',') try: Xname = np.loadtxt(self.fileName + '.marker.csv', delimiter=',') except: Xname = ['geno ' + str(i + 1) for i in range(X.shape[1])] if self.imputationFlag: X = self.imputation(X) keep = True - np.isnan(y) return X[keep, :], y[keep], Xname else: X = self.simpleImputation(X) keep = (y == y) return X[keep, :], y[keep], Xname
def test_G0_has_reader(self): logging.info("TestSingleSnp test_G0_has_reader") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file_name = self.file_name("G0_has_reader") frame0 = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps, leave_out_one_chrom=False, covar=covar, mixing=0, output_file_name=output_file_name, count_A1=False) self.compare_files(frame0, "one") frame1 = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=KernelIdentity(test_snps.iid), G1=test_snps, leave_out_one_chrom=False, covar=covar, mixing=1, output_file_name=output_file_name, count_A1=False) self.compare_files(frame1, "one")
def test_linreg(self): logging.info("TestSingleSnpLinReg test_linreg") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file = self.file_name("linreg") frame1 = single_snp_linreg(test_snps=test_snps[:, :10], pheno=pheno, covar=covar, output_file_name=output_file, count_A1=False) frame1 = frame1[[ 'sid_index', 'SNP', 'Chr', 'GenDist', 'ChrPos', 'PValue' ]] self.compare_files(frame1, "linreg") frame2 = single_snp_linreg(test_snps=test_snps[:, :10], pheno=pheno, covar=covar, output_file_name=output_file, count_A1=False) self.compare_files(frame2, "linreg")
def test_file_cache(self): logging.info("TestSingleSnp test_file_cache") test_snps = Bed(self.bedbase, count_A1=False) pheno = self.phen_fn covar = self.cov_fn output_file_name = self.file_name("G1") cache_file = self.file_name("cache_file") + ".npz" if os.path.exists(cache_file): os.remove(cache_file) frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps[:, 10:100], leave_out_one_chrom=False, covar=covar, G1=test_snps[:, 100:200], mixing=.5, output_file_name=output_file_name, cache_file=cache_file, count_A1=False) self.compare_files(frame, "G1") frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps[:, 10:100], leave_out_one_chrom=False, covar=covar, G1=test_snps[:, 100:200], mixing=.5, output_file_name=output_file_name, cache_file=cache_file, count_A1=False) self.compare_files(frame, "G1")
def cut_ld(bfile, chunk_size, ld_dir): geno = Bed(bfile, count_A1=False) snp_num = geno.col_count # number of chunks snps devides into chunk_num = int(np.ceil(float(snp_num) / chunk_size)) # number of parts of the covariance matrix that needs to be computed part_num = chunk_num * chunk_num part_list = [] row_list = [] col_list = [] for part_i in range(1, part_num + 1): row_i = int((part_i - 1) / chunk_num) col_i = int((part_i - 1) % chunk_num) row_start = row_i * chunk_size row_end = (row_i + 1) * chunk_size col_start = col_i * chunk_size col_end = (col_i + 1) * chunk_size part_list.append(part_i) row_list.append('{}-{}'.format(row_start, row_end)) col_list.append('{}-{}'.format(col_start, col_end)) df = pd.DataFrame({ 'row': row_list, 'col': col_list }, columns=['row', 'col']) df.to_csv(join(ld_dir, 'part.info'), index=False, header=False, sep='\t')
def test_some_std(self): k0 = self.snpdata.read_kernel(standardizer=Unit()).val from pysnptools.kernelreader import SnpKernel k1 = self.snpdata.read_kernel(standardizer=Unit()) np.testing.assert_array_almost_equal(k0, k1.val, decimal=10) from pysnptools.snpreader import SnpData snpdata2 = SnpData(iid=self.snpdata.iid, sid=self.snpdata.sid, pos=self.snpdata.pos, val=np.array(self.snpdata.val)) s = str(snpdata2) snpdata2.standardize() s = str(snpdata2) snpreader = Bed(self.currentFolder + "/examples/toydata", count_A1=False) k2 = snpreader.read_kernel(standardizer=Unit(), block_size=500).val np.testing.assert_array_almost_equal(k0, k2, decimal=10) from pysnptools.standardizer.identity import Identity from pysnptools.standardizer.diag_K_to_N import DiagKtoN for dtype in [sp.float64, sp.float32]: for std in [Unit(), Beta(1, 25), Identity(), DiagKtoN()]: s = str(std) np.random.seed(0) x = np.array(np.random.randint(3, size=[60, 100]), dtype=dtype) x2 = x[:, ::2] x2b = np.array(x2) #LATER what's this about? It doesn't do non-contiguous? #assert not x2.flags['C_CONTIGUOUS'] and not x2.flags['F_CONTIGUOUS'] #set up to test non contiguous #assert x2b.flags['C_CONTIGUOUS'] or x2b.flags['F_CONTIGUOUS'] #set up to test non contiguous #a,b = std.standardize(x2b),std.standardize(x2) #np.testing.assert_array_almost_equal(a,b) logging.info("done")
def setUpClass(self): currentFolder = os.path.dirname(os.path.realpath(__file__)) self.snp_fn = currentFolder + "/../../tests/datasets/mouse/alldata" self.pheno_fn = currentFolder + "/../../tests/datasets/mouse/pheno_10_causals.txt" #self.cov_fn = currentFolder + "/examples/toydata.cov" # load data ################################################################### snp_reader = Bed(self.snp_fn) pheno = pstpheno.loadOnePhen(self.pheno_fn) #cov = pstpheno.loadPhen(self.cov_fn) # intersect sample ids snp_reader, pheno = pysnptools.util.intersect_apply( [snp_reader, pheno]) self.G = snp_reader.read(order='C').val self.G = stdizer.Unit().standardize(self.G) self.G.flags.writeable = False self.y = pheno['vals'][:, 0] self.y.flags.writeable = False # load pcs #self.G_cov = cov['vals'] self.G_cov = np.ones((len(self.y), 1)) self.G_cov.flags.writeable = False
def test_write_x_x_cpp(self): for count_A1 in [False, True]: snpreader = Bed(self.currentFolder + "/examples/toydata", count_A1=count_A1) for order in ['C', 'F']: for dtype in [np.float32, np.float64]: snpdata = snpreader.read(order=order, dtype=dtype) snpdata.val[-1, 0] = float("NAN") output = "tempdir/toydata.{0}{1}.cpp".format( order, "32" if dtype == np.float32 else "64") create_directory_if_necessary(output) Bed.write(output, snpdata, count_A1=count_A1) snpdata2 = Bed(output, count_A1=count_A1).read() np.testing.assert_array_almost_equal(snpdata.val, snpdata2.val, decimal=10)
def test1(self): logging.info("in TestSnpGen test1") seed = 0 snpgen = SnpGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000) snpdata = snpgen[:, [0, 1, 200, 2200, 10]].read() snpdata2 = snpgen[:, [0, 1, 200, 2200, 10]].read() assert (snpdata.allclose(snpdata2)) from pysnptools.snpreader import Bed ref = Bed(os.path.dirname(os.path.realpath(__file__)) + '/../../tests/datasets/snpgen.bed', count_A1=False).read() assert (snpdata.allclose(ref, equal_nan=True)) cache_file = 'tempdir/cache_file_test1.npz' os.remove(cache_file) if os.path.exists(cache_file) else None snpgen3 = SnpGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000, cache_file=cache_file) snpdata3 = snpgen3[::10, [0, 1, 200, 2200, 10]].read() assert (snpdata3.allclose(snpdata2[::10, :].read())) snpgen4 = SnpGen(seed=seed, iid_count=1000, sid_count=1000 * 1000, block_size=1000, cache_file=cache_file) snpdata4 = snpgen4[::10, [0, 1, 200, 2200, 10]].read() assert (snpdata4.allclose(snpdata2[::10, :].read()))
def __init__(self, path_or_bed, blocksize, LOCO_chrom_id=None, forcelowrank=False): """Constructor.""" self.forcelowrank = forcelowrank # only for testing purposes! if isinstance(path_or_bed, str): self.bed = Bed(path_or_bed, count_A1=True) else: assert isinstance( path_or_bed, SnpReader ), 'path_or_bed must either be a path to a bed-file, or an instance of SnpReader.' self.bed.pos[:, 0] = self.bed.pos[:, 0].astype( 'str') # chromosome should be str, stored positions are 1-based self.iid_fid = pd.DataFrame(self.bed.iid, index=self.bed.iid[:, 1].astype(str), columns=['fid', 'iid']) self.variants_to_include = self._get_LOCO_SNV_indices(LOCO_chrom_id) self.blocksize = blocksize self.nb_ind = None self.nb_SNVs_unf = None self.G0 = None self.K0 = None self.nb_SNVs_f = None self.samples_overlapped = False
def test_three(self): #!!! rather a big test case from pysnptools.util.mapreduce1.runner import Local, LocalMultiProc logging.info("TestSingleSnpAllPlusSelect test_three") bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" bed_fn = Bed(bed_fn, count_A1=False) pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" mf_name = "lmp" #"local", "coreP", "nodeP", "socketP", "nodeE", "lmp" runner = mf_to_runner_function(mf_name)(4) output_file_name = self.file_name("three") results = single_snp_all_plus_select( test_snps=bed_fn, pheno=pheno_fn, covar=cov_fn, k_list=[int(k) for k in np.logspace(0, 7, base=2, num=7)], n_folds=7, seed=42, do_plot=False, GB_goal=2, output_file_name=output_file_name, runner=runner, count_A1=False) logging.info(results) self.compare_files(results, "three")
def test_pheno1(self): from pysnptools.snpreader import Bed, SnpData, SnpNpz some_snp_data = Bed(self.currentFolder + "/../../tests/datasets/generate/gen2.bed",count_A1=False).read() gen_snpdata = SnpData(iid=some_snp_data.iid,sid=["pheno"],val=_generate_phenotype(some_snp_data, 10, genetic_var=.5, noise_var=.5, seed=5).reshape(-1,1)) #SnpNpz.write(r'c:\deldir\pheno1.snp.npz',gen_snpdata) ref_snpdata = SnpNpz(self.currentFolder + "/../../tests/datasets/generate/pheno1.snp.npz").read() assert gen_snpdata == ref_snpdata
def _snp_fixup(snp_input, iid_source_if_none=None): if isinstance(snp_input, str): return Bed(snp_input) elif snp_input is None: return iid_source_if_none[:, 0:0] #return snpreader with no snps else: return snp_input
def hess_h2g(self): geno = Bed(self.bfile, count_A1=False) indv_idx = np.loadtxt("./sample/indv_idx.txt", delimiter=",", dtype=int) snp_idx = np.loadtxt("./sample/snp_idx.txt", delimiter=",", dtype=int) phe = np.load("./phe/phe_gene_std.npy") n = self.num_indv p = self.num_snp h2g_est = np.zeros(self.num_sim) for sim_i in range(self.num_sim): geno_val = geno[indv_idx[:, sim_i], snp_idx[:, sim_i]].read().val f = np.sum(geno_val, axis=0) / (2 * self.num_indv) X = (geno_val - 2 * f) / np.sqrt(2 * f * (1 - f)) y = phe[sim_i, :] beta_est = np.matmul(np.transpose(X), y) / n V = np.loadtxt('./ld/ld_f{}.txt'.format(sim_i), delimiter=",") V_pinv = np.linalg.pinv(V) q = np.trace(np.matmul(V_pinv, V)) h2g = (n * np.linalg.multi_dot([beta_est, V_pinv, beta_est]) - q) / (n - q) h2g_est[sim_i] = h2g np.savetxt("h2g_hess.txt", h2g_est, delimiter=",") print("HESS:") print(np.nanmean(h2g_est), np.nanvar(h2g_est), np.nanmedian(h2g_est)) var = (n / (n - p))**2 * (2 * p * ( (1 - self.h2g) / n) + 4 * self.h2g) * ((1 - self.h2g) / n) print(var)
def test_snp_kernel2(self): logging.info("in test_snp_kernel2") snpreader = Bed(self.currentFolder + "/../examples/toydata.5chrom.bed", count_A1=False) snpkernel = SnpKernel(snpreader, standardizer=stdizer.Beta(1, 25)) s = str(snpkernel) _fortesting_JustCheckExists().input(snpkernel)
def test_file_cache(self): logging.info("TestSingleSnp test_file_cache") from pysnptools.snpreader import Bed test_snps = Bed(self.bedbase) pheno = self.phen_fn covar = self.cov_fn output_file_name = self.file_name("G1") cache_file = self.file_name("cache_file") + ".npz" if os.path.exists(cache_file): os.remove(cache_file) frame = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=test_snps[:, 10:100], covar=covar, G1=test_snps[:, 100:200], mixing=.5, output_file_name=output_file_name, cache_file=cache_file) self.compare_files(frame, "G1") frame2 = single_snp(test_snps=test_snps[:, :10], pheno=pheno, G0=None, covar=covar, G1=None, mixing=.5, output_file_name=output_file_name, cache_file=cache_file) self.compare_files(frame2, "G1")