def test_old_sel_plus_pc(self): #!!! rather a big test case logging.info("TestSingleSnpSelect old_sel_plus_pc") from pysnptools.snpreader import Bed from fastlmm.util import compute_auto_pcs # define file names bed_fn = self.pythonpath + "/tests/datasets/synth/all" cov_fn = "pcs_cov.txt" # run PCgeno #TODO: rename to auto_pcs result = compute_auto_pcs(bed_fn, output_file_name=cov_fn) logging.info("selected number of PCs: {0}".format(result["vals"].shape[1])) # import algorithms from fastlmm.util.run_fastlmmc import runFASTLMM, runLMMSELECT # set some file paths for fastlmmc phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" out_dir = self.tempout_dir fastlmm_path = self.pythonpath + "/external/fastlmmc" # consists of two fastlmmc calls, one that does feature selection and one that runs GWAS for suffix,logdelta in [("h2IsHalf",0),("h2Search",None)]: result_file_name = "sel_plus_pc_old_{0}".format("h2IsHalf" if logdelta is 0 else "h2Search") runLMMSELECT(bed_fn, phen_fn, out_dir, result_file_name, bfileSim=bed_fn, covar=cov_fn, fastlmm_path=fastlmm_path,autoSelectCriterionMSE=False,excludeByGeneticDistance=1000,optLogdelta=logdelta) # compare sel_plus_pc_old_h2*.LMMSELECT.out.txt short = result_file_name+".LMMSELECT.out" results=pd.read_csv(self.tempout_dir+"/"+short+".txt",delimiter='\s',comment=None,engine='python') results['PValue']=results.Pvalue #add a new column with different capitalization self.compare_files(results,short)
def _sel_plus_pc(self, h2, force_low_rank, force_full_rank, count_A1=None): do_plot = False use_cache = False # define file names bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" pcs_fn = os.path.join(self.tempout_dir, "sel_plus_pc.pcs.txt") if not (use_cache and os.path.exists(pcs_fn)): from fastlmm.util import compute_auto_pcs covar = compute_auto_pcs(bed_fn, count_A1=count_A1) logging.info("selected number of PCs: {0}".format( covar["vals"].shape[1])) Pheno.write( pcs_fn, SnpData(iid=covar['iid'], sid=covar['header'], val=covar['vals'])) else: logging.info("Using top pcs's cache") covar = Pheno(pcs_fn) mf_name = "lmp" #"lmpl" "local", "coreP", "nodeP", "socketP", "nodeE", "lmp" runner = mf_to_runner_function(mf_name)(20) logging.info( "Working on h2={0},force_low_rank={1},force_full_rank={2}".format( h2, force_low_rank, force_full_rank)) result_file_name = "sel_plus_pc_{0}".format("h2IsHalf" if h2 == .5 else "h2Search") output_file_name = os.path.join(self.tempout_dir, result_file_name) + ".txt" results = single_snp_select(test_snps=bed_fn, G=bed_fn, pheno=phen_fn, k_list=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 160, 200, 250, 320, 400, 500, 630, 800, 1000 ], h2=h2, n_folds=self.pythonpath + "/tests/datasets/synth/DebugEmitFolds.txt", covar=covar, output_file_name=output_file_name, force_low_rank=force_low_rank, force_full_rank=force_full_rank, GB_goal=2, count_A1=False #runner = runner ) logging.info(results.head()) self.compare_files(results, result_file_name)
def _sel_plus_pc(self,h2,force_low_rank,force_full_rank,count_A1=None): do_plot = False use_cache = False # define file names bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" pcs_fn = os.path.join(self.tempout_dir,"sel_plus_pc.pcs.txt") if not (use_cache and os.path.exists(pcs_fn)): from fastlmm.util import compute_auto_pcs covar = compute_auto_pcs(bed_fn,count_A1=count_A1) logging.info("selected number of PCs: {0}".format(covar["vals"].shape[1])) Pheno.write(pcs_fn,SnpData(iid=covar['iid'],sid=covar['header'],val=covar['vals'])) else: logging.info("Using top pcs's cache") covar=Pheno(pcs_fn) mf_name = "lmp" #"lmpl" "local", "coreP", "nodeP", "socketP", "nodeE", "lmp" runner = mf_to_runner_function(mf_name)(20) logging.info("Working on h2={0},force_low_rank={1},force_full_rank={2}".format(h2,force_low_rank,force_full_rank)) result_file_name = "sel_plus_pc_{0}".format("h2IsHalf" if h2 == .5 else "h2Search") output_file_name = os.path.join(self.tempout_dir,result_file_name)+".txt" results = single_snp_select(test_snps=bed_fn, G=bed_fn, pheno=phen_fn, k_list = [0,1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90,100,125,160,200,250,320,400,500,630,800,1000], h2=h2, n_folds = self.pythonpath + "/tests/datasets/synth/DebugEmitFolds.txt", covar=covar, output_file_name=output_file_name, force_low_rank=force_low_rank,force_full_rank=force_full_rank, GB_goal=2, count_A1=False #runner = runner ) logging.info(results.head()) self.compare_files(results,result_file_name)
f.write(' '.join([str(pc) for pc in X_fit[iid_index, :]])) f.write('\n') result = {'iid':sp.array(snpreader.iid),'vals':X_fit} return result def _snp_fixup(snp_input): if isinstance(snp_input, str): return Bed(snp_input) else: return snp_input if __name__ == "__main__": import doctest doctest.testmod() import logging from pysnptools.snpreader import Bed from fastlmm.util import compute_auto_pcs logging.basicConfig(level=logging.INFO) #file_name = "../../tests/datasets/mouse/alldata" file_name = "../feature_selection/examples/toydata" #file_name = r"c:\deldir\N4000S50000c500h0.50s0.00p0.50F0.0050FH0.2000v0.30_3" result = compute_auto_pcs(file_name) #print result print "done"