def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx,:]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count)
                K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert"
                K_train = K_whole_unittrain[train_idx]
                    
                single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno
                             covar=covar, leave_out_one_chrom=False,
                             GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in =  [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])]
                        logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k))

                        top_k_train = top_k[train_idx,:] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal)
                        fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2=h2) #iid intersection means when can give the whole covariate and pheno
    
                        top_k_test = top_k[test_idx,:] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:,test_idx]
                        nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None
    
                return k_list_in, top_snps, k_index_to_nLL
Exemple #2
0
            def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx,:]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count)
                K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert"
                K_train = K_whole_unittrain[train_idx]
                    
                single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno
                             covar=covar, leave_out_one_chrom=False,
                             GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in =  [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])]
                        logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k))

                        top_k_train = top_k[train_idx,:] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal)
                        fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2raw=h2) #iid intersection means when can give the whole covariate and pheno
    
                        top_k_test = top_k[test_idx,:] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:,test_idx]
                        nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None
    
                return k_list_in, top_snps, k_index_to_nLL