def test_old(self): do_plot = False from fastlmm.feature_selection.feature_selection_two_kernel import FeatureSelectionInSample from pysnptools.util import intersect_apply logging.info("TestSingleSnpAllPlusSelect test_old") bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" #load data ################################################################### snp_reader = Bed(bed_fn, count_A1=False) pheno = Pheno(pheno_fn) cov = Pheno(cov_fn) # intersect sample ids snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov]) # read in snps # partition snps on chr5 vs rest test_chr = 5 G0 = snp_reader[:, snp_reader.pos[:, 0] != test_chr].read( order='C').standardize() test_snps = snp_reader[:, snp_reader.pos[:, 0] == test_chr].read( order='C').standardize() y = pheno.read().val[:, 0] y -= y.mean() y /= y.std() # load covariates X_cov = cov.read().val X_cov.flags.writeable = False # invoke feature selection to learn which SNPs to use to build G1 logging.info( "running feature selection conditioned on background kernel") # partition data into the first 50 SNPs on chr1 and all but chr1 select = FeatureSelectionInSample(max_log_k=7, n_folds=7, order_by_lmm=True, measure="ll", random_state=42) best_k, feat_idx, best_mix, best_delta = select.run_select(G0.val, G0.val, y, cov=X_cov) # plot out of sample error if do_plot: select.plot_results(measure="ll") # select.plot_results(measure="mse") # print results logging.info("best_k:{0}".format(best_k)) logging.info("best_mix:{0}".format(best_mix)) logging.info("best_delta:{0}".format(best_delta)) ############################### # use selected SNPs to build G1 logging.info(feat_idx) G1 = G0[:, feat_idx] output_file_name = self.file_name("old") results_df = single_snp(test_snps, pheno, G0=G0, G1=G1, mixing=best_mix, h2=None, leave_out_one_chrom=False, output_file_name=output_file_name, count_A1=False) logging.info("results:") logging.info("#" * 40) logging.info(results_df.head()) self.compare_files(results_df, "old")
def test_old(self): do_plot = False from fastlmm.feature_selection.feature_selection_two_kernel import FeatureSelectionInSample from pysnptools.util import intersect_apply logging.info("TestSingleSnpAllPlusSelect test_old") bed_fn = self.pythonpath + "/tests/datasets/synth/all.bed" pheno_fn = self.pythonpath + "/tests/datasets/synth/pheno_10_causals.txt" cov_fn = self.pythonpath + "/tests/datasets/synth/cov.txt" #load data ################################################################### snp_reader = Bed(bed_fn) pheno = Pheno(pheno_fn) cov = Pheno(cov_fn) # intersect sample ids snp_reader, pheno, cov = intersect_apply([snp_reader, pheno, cov]) # read in snps # partition snps on chr5 vs rest test_chr = 5 G0 = snp_reader[:,snp_reader.pos[:,0] != test_chr].read(order='C').standardize() test_snps = snp_reader[:,snp_reader.pos[:,0] == test_chr].read(order='C').standardize() y = pheno.read().val[:,0] y -= y.mean() y /= y.std() # load covariates X_cov = cov.read().val X_cov.flags.writeable = False # invoke feature selection to learn which SNPs to use to build G1 logging.info("running feature selection conditioned on background kernel") # partition data into the first 50 SNPs on chr1 and all but chr1 select = FeatureSelectionInSample(max_log_k=7, n_folds=7, order_by_lmm=True, measure="ll", random_state=42) best_k, feat_idx, best_mix, best_delta = select.run_select(G0.val, G0.val, y, cov=X_cov) # plot out of sample error if do_plot: select.plot_results(measure="ll") # select.plot_results(measure="mse") # print results logging.info("best_k:{0}".format(best_k)) logging.info("best_mix:{0}".format(best_mix)) logging.info("best_delta:{0}".format(best_delta)) ############################### # use selected SNPs to build G1 logging.info(feat_idx) G1 = G0[:,feat_idx] output_file_name = self.file_name("old") results_df = single_snp(test_snps, pheno, G0=G0, G1=G1, mixing=best_mix, h2=None,leave_out_one_chrom=False,output_file_name=output_file_name) logging.info("results:") logging.info("#"*40) logging.info(results_df.head()) self.compare_files(results_df,"old")