def mapper_single_snp_2K_given_chrom(test_chr): logging.info("Working on chr={0}".format(test_chr)) test_snps_chrom = test_snps[:,test_snps.pos[:,0]==test_chr] G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader chrom_index = chrom_list.index(test_chr) best_sid = chrom_index_to_best_sid[chrom_index] K1 = G_for_chrom[:,G_for_chrom.sid_to_index(best_sid)] result = single_snp(test_snps=test_snps_chrom, K0=G_for_chrom, K1=K1, pheno=pheno, covar=covar, leave_out_one_chrom=False, GB_goal=GB_goal, force_full_rank=force_full_rank, force_low_rank=force_low_rank,mixing=mixing,h2=h2) return result
def mapper_find_best_given_chrom(test_chr): G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader def mapper_gather_lots(i_fold_and_pair): i_fold, (train_idx, test_idx) = i_fold_and_pair logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold)) G_train = G_for_chrom[train_idx,:] #Precompute whole x whole standardized on train from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank) block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count) K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read() assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert" K_train = K_whole_unittrain[train_idx] single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno covar=covar, leave_out_one_chrom=False, GB_goal=GB_goal, force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2) is_all = (i_fold == n_folds) if n_folds > 1 else True k_list_in = [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)] if is_all: top_snps = list(single_snp_result.SNP[:max_k]) else: top_snps = None if i_fold == n_folds: k_index_to_nLL = None else: k_index_to_nLL = [] for k in k_list_in: top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])] logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k)) top_k_train = top_k[train_idx,:] if k > 0 else None fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal) fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2=h2) #iid intersection means when can give the whole covariate and pheno top_k_test = top_k[test_idx,:] if k > 0 else None K0_whole_test = K_whole_unittrain[:,test_idx] nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno k_index_to_nLL.append(nLL) if i_fold > 0: k_list_in = None return k_list_in, top_snps, k_index_to_nLL def reducer_find_best(top_snps_and_k_index_to_nLL_sequence): #Starts fold_index+all -> k_index -> nll #Need: k_index -> sum(fold_index -> nll) k_index_to_sum_nll = None top_snps_all = None k_list_in_all = None for i_fold, (k_list_in, top_snps, k_index_to_nLL) in enumerate(top_snps_and_k_index_to_nLL_sequence): if k_list_in is not None: assert k_list_in_all is None, "real assert" k_list_in_all = k_list_in k_index_to_sum_nll = np.zeros(len(k_list_in)) if top_snps is not None: assert top_snps_all is None, "real assert" top_snps_all = top_snps if k_index_to_nLL is not None: assert i_fold < n_folds or n_folds == 1, "real assert" for k_index, nLL in enumerate(k_index_to_nLL): k_index_to_sum_nll[k_index] += nLL #find best # top_snps best_k = k_list_in_all[np.argmin(k_index_to_sum_nll)] logging.info("For chrom={0}, best_k={1}".format(test_chr,best_k)) if do_plot: _nll_plot(k_list_in_all, k_index_to_sum_nll) #Return the top snps from all result = top_snps_all[:best_k] return result i_fold_index_to_top_snps_and_k_index_to_nLL = map_reduce( _kfold(G_for_chrom.iid_count, n_folds, seed, end_with_all=True), mapper=mapper_gather_lots, reducer=reducer_find_best) return i_fold_index_to_top_snps_and_k_index_to_nLL
def mapper_find_best_given_chrom(test_chr): G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader def mapper_gather_lots(i_fold_and_pair): i_fold, (train_idx, test_idx) = i_fold_and_pair logging.info( "Working on GWAS_1K and k search, chrom={0}, i_fold={1}". format(test_chr, i_fold)) G_train = G_for_chrom[train_idx, :] #Precompute whole x whole standardized on train from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal min_count = _internal_determine_block_size( G_for_chrom, None, None, force_full_rank, force_low_rank) block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count) K_whole_unittrain = _SnpWholeWithTrain( whole=G_for_chrom, train_idx=train_idx, standardizer=Unit(), block_size=block_size).read() assert np.array_equal(K_whole_unittrain.iid, G_for_chrom.iid), "real assert" K_train = K_whole_unittrain[train_idx] single_snp_result = single_snp( test_snps=G_train, K0=K_train, pheno= pheno, #iid intersection means when can give the whole covariate and pheno covar=covar, leave_out_one_chrom=False, GB_goal=GB_goal, force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2, count_A1=count_A1) is_all = (i_fold == n_folds) if n_folds > 1 else True k_list_in = [0] + [ int(k) for k in k_list if 0 < k and k < len(single_snp_result) ] if is_all: top_snps = list(single_snp_result.SNP[:max_k]) else: top_snps = None if i_fold == n_folds: k_index_to_nLL = None else: k_index_to_nLL = [] for k in k_list_in: top_k = G_for_chrom[:, G_for_chrom.sid_to_index( single_snp_result.SNP[:k])] logging.info( "Working on chr={0}, i_fold={1}, and K_{2}".format( test_chr, i_fold, k)) top_k_train = top_k[train_idx, :] if k > 0 else None fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank, GB_goal=GB_goal) fastlmm.fit( K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno, mixing=mixing, h2raw=h2 ) #iid intersection means when can give the whole covariate and pheno top_k_test = top_k[test_idx, :] if k > 0 else None K0_whole_test = K_whole_unittrain[:, test_idx] nLL = fastlmm.score( K0_whole_test=K0_whole_test, K1_whole_test=top_k_test, X=covar, y=pheno ) #iid intersection means when can give the whole covariate and pheno k_index_to_nLL.append(nLL) if i_fold > 0: k_list_in = None return k_list_in, top_snps, k_index_to_nLL def reducer_find_best(top_snps_and_k_index_to_nLL_sequence): #Starts fold_index+all -> k_index -> nll #Need: k_index -> sum(fold_index -> nll) k_index_to_sum_nll = None top_snps_all = None k_list_in_all = None for i_fold, (k_list_in, top_snps, k_index_to_nLL) in enumerate( top_snps_and_k_index_to_nLL_sequence): if k_list_in is not None: assert k_list_in_all is None, "real assert" k_list_in_all = k_list_in k_index_to_sum_nll = np.zeros(len(k_list_in)) if top_snps is not None: assert top_snps_all is None, "real assert" top_snps_all = top_snps if k_index_to_nLL is not None: assert i_fold < n_folds or n_folds == 1, "real assert" for k_index, nLL in enumerate(k_index_to_nLL): k_index_to_sum_nll[k_index] += nLL #find best # top_snps best_k = k_list_in_all[np.argmin(k_index_to_sum_nll)] logging.info("For chrom={0}, best_k={1}".format( test_chr, best_k)) if do_plot: _nll_plot(k_list_in_all, k_index_to_sum_nll) #Return the top snps from all result = top_snps_all[:best_k] return result i_fold_index_to_top_snps_and_k_index_to_nLL = map_reduce( _kfold(G_for_chrom.iid_count, n_folds, seed, end_with_all=True), mapper=mapper_gather_lots, reducer=reducer_find_best) return i_fold_index_to_top_snps_and_k_index_to_nLL