예제 #1
0
    def test_old_sel_plus_pc(self): #!!! rather a big test case
        logging.info("TestSingleSnpSelect old_sel_plus_pc")

        from pysnptools.snpreader import Bed
        from fastlmm.util import compute_auto_pcs

        # define file names
        bed_fn = self.pythonpath + "/tests/datasets/synth/all"
        cov_fn = "pcs_cov.txt"

        # run PCgeno
        #TODO: rename to auto_pcs
        result = compute_auto_pcs(bed_fn, output_file_name=cov_fn)
        logging.info("selected number of PCs: {0}".format(result["vals"].shape[1]))

        # import algorithms
        from fastlmm.util.run_fastlmmc import runFASTLMM, runLMMSELECT

        # set some file paths for fastlmmc
        phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt"
        out_dir = self.tempout_dir
        fastlmm_path = self.pythonpath + "/external/fastlmmc"

        # consists of two fastlmmc calls, one that does feature selection and one that runs GWAS
        for suffix,logdelta in [("h2IsHalf",0),("h2Search",None)]:
            result_file_name = "sel_plus_pc_old_{0}".format("h2IsHalf" if logdelta is 0 else "h2Search")
            runLMMSELECT(bed_fn, phen_fn, out_dir, result_file_name, bfileSim=bed_fn, covar=cov_fn, fastlmm_path=fastlmm_path,autoSelectCriterionMSE=False,excludeByGeneticDistance=1000,optLogdelta=logdelta)
            # compare sel_plus_pc_old_h2*.LMMSELECT.out.txt
            short = result_file_name+".LMMSELECT.out"
            results=pd.read_csv(self.tempout_dir+"/"+short+".txt",delimiter='\s',comment=None,engine='python')
            results['PValue']=results.Pvalue #add a new column with different capitalization
            self.compare_files(results,short)
예제 #2
0
    def test_old_sel_plus_pc(self): #!!! rather a big test case
        logging.info("TestSingleSnpSelect old_sel_plus_pc")

        from pysnptools.snpreader import Bed
        from fastlmm.util import compute_auto_pcs

        # define file names
        bed_fn = self.pythonpath + "/tests/datasets/synth/all"
        cov_fn = "pcs_cov.txt"

        # run PCgeno
        #TODO: rename to auto_pcs
        result = compute_auto_pcs(bed_fn, output_file_name=cov_fn)
        logging.info("selected number of PCs: {0}".format(result["vals"].shape[1]))

        # import algorithms
        from fastlmm.util.run_fastlmmc import runFASTLMM, runLMMSELECT

        # set some file paths for fastlmmc
        phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt"
        out_dir = self.tempout_dir
        fastlmm_path = self.pythonpath + "/external/fastlmmc"

        # consists of two fastlmmc calls, one that does feature selection and one that runs GWAS
        for suffix,logdelta in [("h2IsHalf",0),("h2Search",None)]:
            result_file_name = "sel_plus_pc_old_{0}".format("h2IsHalf" if logdelta is 0 else "h2Search")
            runLMMSELECT(bed_fn, phen_fn, out_dir, result_file_name, bfileSim=bed_fn, covar=cov_fn, fastlmm_path=fastlmm_path,autoSelectCriterionMSE=False,excludeByGeneticDistance=1000,optLogdelta=logdelta)
            # compare sel_plus_pc_old_h2*.LMMSELECT.out.txt
            short = result_file_name+".LMMSELECT.out"
            results=pd.read_csv(self.tempout_dir+"/"+short+".txt",delimiter='\s',comment=None,engine='python')
            results['PValue']=results.Pvalue #add a new column with different capitalization
            self.compare_files(results,short)
예제 #3
0
    def _sel_plus_pc(self, h2, force_low_rank, force_full_rank, count_A1=None):
        do_plot = False
        use_cache = False

        # define file names
        bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed"
        phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt"

        pcs_fn = os.path.join(self.tempout_dir, "sel_plus_pc.pcs.txt")
        if not (use_cache and os.path.exists(pcs_fn)):
            from fastlmm.util import compute_auto_pcs
            covar = compute_auto_pcs(bed_fn, count_A1=count_A1)
            logging.info("selected number of PCs: {0}".format(
                covar["vals"].shape[1]))
            Pheno.write(
                pcs_fn,
                SnpData(iid=covar['iid'],
                        sid=covar['header'],
                        val=covar['vals']))
        else:
            logging.info("Using top pcs's cache")
            covar = Pheno(pcs_fn)

        mf_name = "lmp"  #"lmpl" "local", "coreP", "nodeP", "socketP", "nodeE", "lmp"
        runner = mf_to_runner_function(mf_name)(20)

        logging.info(
            "Working on h2={0},force_low_rank={1},force_full_rank={2}".format(
                h2, force_low_rank, force_full_rank))
        result_file_name = "sel_plus_pc_{0}".format("h2IsHalf" if h2 ==
                                                    .5 else "h2Search")
        output_file_name = os.path.join(self.tempout_dir,
                                        result_file_name) + ".txt"
        results = single_snp_select(test_snps=bed_fn,
                                    G=bed_fn,
                                    pheno=phen_fn,
                                    k_list=[
                                        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20,
                                        30, 40, 50, 60, 70, 80, 90, 100, 125,
                                        160, 200, 250, 320, 400, 500, 630, 800,
                                        1000
                                    ],
                                    h2=h2,
                                    n_folds=self.pythonpath +
                                    "/tests/datasets/synth/DebugEmitFolds.txt",
                                    covar=covar,
                                    output_file_name=output_file_name,
                                    force_low_rank=force_low_rank,
                                    force_full_rank=force_full_rank,
                                    GB_goal=2,
                                    count_A1=False
                                    #runner = runner
                                    )
        logging.info(results.head())
        self.compare_files(results, result_file_name)
    def _sel_plus_pc(self,h2,force_low_rank,force_full_rank,count_A1=None):
        do_plot = False
        use_cache = False

        # define file names
        bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed"
        phen_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt"

        pcs_fn = os.path.join(self.tempout_dir,"sel_plus_pc.pcs.txt")
        if not (use_cache and os.path.exists(pcs_fn)):
            from fastlmm.util import compute_auto_pcs
            covar = compute_auto_pcs(bed_fn,count_A1=count_A1)
            logging.info("selected number of PCs: {0}".format(covar["vals"].shape[1]))
            Pheno.write(pcs_fn,SnpData(iid=covar['iid'],sid=covar['header'],val=covar['vals']))
        else:
            logging.info("Using top pcs's cache")
            covar=Pheno(pcs_fn)


        mf_name = "lmp" #"lmpl" "local", "coreP", "nodeP", "socketP", "nodeE", "lmp"
        runner = mf_to_runner_function(mf_name)(20)

        logging.info("Working on h2={0},force_low_rank={1},force_full_rank={2}".format(h2,force_low_rank,force_full_rank))
        result_file_name = "sel_plus_pc_{0}".format("h2IsHalf" if h2 == .5 else "h2Search")
        output_file_name = os.path.join(self.tempout_dir,result_file_name)+".txt"
        results = single_snp_select(test_snps=bed_fn, G=bed_fn, pheno=phen_fn,
                                        k_list = [0,1,2,3,4,5,6,7,8,9,10,20,30,40,50,60,70,80,90,100,125,160,200,250,320,400,500,630,800,1000],
                                        h2=h2,
                                        n_folds = self.pythonpath + "/tests/datasets/synth/DebugEmitFolds.txt",
                                        covar=covar,
                                        output_file_name=output_file_name,
                                        force_low_rank=force_low_rank,force_full_rank=force_full_rank,
                                        GB_goal=2,
                                        count_A1=False
                                        #runner = runner
                                    )
        logging.info(results.head())
        self.compare_files(results,result_file_name)
예제 #5
0
                f.write(' '.join([str(pc) for pc in X_fit[iid_index, :]]))
                f.write('\n')

    result = {'iid':sp.array(snpreader.iid),'vals':X_fit}
    return result

def _snp_fixup(snp_input):
    if isinstance(snp_input, str):
        return Bed(snp_input)
    else:
        return snp_input


if __name__ == "__main__":
    import doctest
    doctest.testmod()

    import logging
    from pysnptools.snpreader import Bed
    from fastlmm.util import compute_auto_pcs
    logging.basicConfig(level=logging.INFO)
    #file_name = "../../tests/datasets/mouse/alldata"
    file_name = "../feature_selection/examples/toydata"
    #file_name = r"c:\deldir\N4000S50000c500h0.50s0.00p0.50F0.0050FH0.2000v0.30_3"
    result = compute_auto_pcs(file_name)
    #print result


    print "done"