def mapper_single_snp_2K_given_chrom(test_chr):
         logging.info("Working on chr={0}".format(test_chr))
         test_snps_chrom = test_snps[:,test_snps.pos[:,0]==test_chr]
         G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader
         chrom_index = chrom_list.index(test_chr)
         best_sid = chrom_index_to_best_sid[chrom_index]
 
         K1 = G_for_chrom[:,G_for_chrom.sid_to_index(best_sid)]
         result = single_snp(test_snps=test_snps_chrom, K0=G_for_chrom, K1=K1, pheno=pheno,
                     covar=covar, leave_out_one_chrom=False, 
                     GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank,mixing=mixing,h2=h2)
         return result
Ejemplo n.º 2
0
     def mapper_single_snp_2K_given_chrom(test_chr):
         logging.info("Working on chr={0}".format(test_chr))
         test_snps_chrom = test_snps[:,test_snps.pos[:,0]==test_chr]
         G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader
         chrom_index = chrom_list.index(test_chr)
         best_sid = chrom_index_to_best_sid[chrom_index]
 
         K1 = G_for_chrom[:,G_for_chrom.sid_to_index(best_sid)]
         result = single_snp(test_snps=test_snps_chrom, K0=G_for_chrom, K1=K1, pheno=pheno,
                     covar=covar, leave_out_one_chrom=False, 
                     GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank,mixing=mixing,h2=h2)
         return result
        def mapper_find_best_given_chrom(test_chr):
            G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader
    
            def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info("Working on GWAS_1K and k search, chrom={0}, i_fold={1}".format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx,:]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal, G_for_chrom.iid_count, min_count)
                K_whole_unittrain = _SnpWholeWithTrain(whole=G_for_chrom,train_idx=train_idx, standardizer=Unit(), block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,G_for_chrom.iid),"real assert"
                K_train = K_whole_unittrain[train_idx]
                    
                single_snp_result = single_snp(test_snps=G_train, K0=K_train, pheno=pheno, #iid intersection means when can give the whole covariate and pheno
                             covar=covar, leave_out_one_chrom=False,
                             GB_goal=GB_goal,  force_full_rank=force_full_rank, force_low_rank=force_low_rank, mixing=mixing, h2=h2)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in =  [0] + [int(k) for k in k_list if 0 < k and k < len(single_snp_result)]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,G_for_chrom.sid_to_index(single_snp_result.SNP[:k])]
                        logging.info("Working on chr={0}, i_fold={1}, and K_{2}".format(test_chr,i_fold,k))

                        top_k_train = top_k[train_idx,:] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank, force_low_rank=force_low_rank,GB_goal=GB_goal)
                        fastlmm.fit(K0_train=K_train, K1_train=top_k_train, X=covar, y=pheno,mixing=mixing,h2=h2) #iid intersection means when can give the whole covariate and pheno
    
                        top_k_test = top_k[test_idx,:] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:,test_idx]
                        nLL = fastlmm.score(K0_whole_test=K0_whole_test,K1_whole_test=top_k_test,X=covar,y=pheno) #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None
    
                return k_list_in, top_snps, k_index_to_nLL

            def reducer_find_best(top_snps_and_k_index_to_nLL_sequence):
                #Starts fold_index+all -> k_index -> nll
                #Need:  k_index -> sum(fold_index -> nll)

                k_index_to_sum_nll = None
                top_snps_all = None
                k_list_in_all = None
                for i_fold, (k_list_in, top_snps, k_index_to_nLL) in enumerate(top_snps_and_k_index_to_nLL_sequence):
                    if k_list_in is not None:
                        assert k_list_in_all is None, "real assert"
                        k_list_in_all = k_list_in
                        k_index_to_sum_nll = np.zeros(len(k_list_in))

                    if top_snps is not None:
                        assert top_snps_all is None, "real assert"
                        top_snps_all = top_snps

                    if k_index_to_nLL is not None:
                        assert i_fold < n_folds or n_folds == 1, "real assert"
                        for k_index, nLL in enumerate(k_index_to_nLL):
                            k_index_to_sum_nll[k_index] += nLL

                #find best # top_snps
                best_k = k_list_in_all[np.argmin(k_index_to_sum_nll)]
                logging.info("For chrom={0}, best_k={1}".format(test_chr,best_k))
                if do_plot: _nll_plot(k_list_in_all, k_index_to_sum_nll)

                #Return the top snps from all
                result = top_snps_all[:best_k]
                return result


            i_fold_index_to_top_snps_and_k_index_to_nLL = map_reduce(
                    _kfold(G_for_chrom.iid_count, n_folds, seed, end_with_all=True),
                    mapper=mapper_gather_lots,
                    reducer=reducer_find_best)
            return i_fold_index_to_top_snps_and_k_index_to_nLL
Ejemplo n.º 4
0
        def mapper_find_best_given_chrom(test_chr):
            G_for_chrom = _K_per_chrom(G, test_chr, G.iid).snpreader

            def mapper_gather_lots(i_fold_and_pair):
                i_fold, (train_idx, test_idx) = i_fold_and_pair
                logging.info(
                    "Working on GWAS_1K and k search, chrom={0}, i_fold={1}".
                    format(test_chr, i_fold))

                G_train = G_for_chrom[train_idx, :]

                #Precompute whole x whole standardized on train
                from fastlmm.association.single_snp import _internal_determine_block_size, _block_size_from_GB_goal
                min_count = _internal_determine_block_size(
                    G_for_chrom, None, None, force_full_rank, force_low_rank)
                block_size = _block_size_from_GB_goal(GB_goal,
                                                      G_for_chrom.iid_count,
                                                      min_count)
                K_whole_unittrain = _SnpWholeWithTrain(
                    whole=G_for_chrom,
                    train_idx=train_idx,
                    standardizer=Unit(),
                    block_size=block_size).read()

                assert np.array_equal(K_whole_unittrain.iid,
                                      G_for_chrom.iid), "real assert"
                K_train = K_whole_unittrain[train_idx]

                single_snp_result = single_snp(
                    test_snps=G_train,
                    K0=K_train,
                    pheno=
                    pheno,  #iid intersection means when can give the whole covariate and pheno
                    covar=covar,
                    leave_out_one_chrom=False,
                    GB_goal=GB_goal,
                    force_full_rank=force_full_rank,
                    force_low_rank=force_low_rank,
                    mixing=mixing,
                    h2=h2,
                    count_A1=count_A1)

                is_all = (i_fold == n_folds) if n_folds > 1 else True

                k_list_in = [0] + [
                    int(k)
                    for k in k_list if 0 < k and k < len(single_snp_result)
                ]

                if is_all:
                    top_snps = list(single_snp_result.SNP[:max_k])
                else:
                    top_snps = None

                if i_fold == n_folds:
                    k_index_to_nLL = None
                else:
                    k_index_to_nLL = []
                    for k in k_list_in:
                        top_k = G_for_chrom[:,
                                            G_for_chrom.sid_to_index(
                                                single_snp_result.SNP[:k])]
                        logging.info(
                            "Working on chr={0}, i_fold={1}, and K_{2}".format(
                                test_chr, i_fold, k))

                        top_k_train = top_k[train_idx, :] if k > 0 else None
                        fastlmm = FastLMM(force_full_rank=force_full_rank,
                                          force_low_rank=force_low_rank,
                                          GB_goal=GB_goal)
                        fastlmm.fit(
                            K0_train=K_train,
                            K1_train=top_k_train,
                            X=covar,
                            y=pheno,
                            mixing=mixing,
                            h2raw=h2
                        )  #iid intersection means when can give the whole covariate and pheno

                        top_k_test = top_k[test_idx, :] if k > 0 else None
                        K0_whole_test = K_whole_unittrain[:, test_idx]
                        nLL = fastlmm.score(
                            K0_whole_test=K0_whole_test,
                            K1_whole_test=top_k_test,
                            X=covar,
                            y=pheno
                        )  #iid intersection means when can give the whole covariate and pheno
                        k_index_to_nLL.append(nLL)

                if i_fold > 0:
                    k_list_in = None

                return k_list_in, top_snps, k_index_to_nLL

            def reducer_find_best(top_snps_and_k_index_to_nLL_sequence):
                #Starts fold_index+all -> k_index -> nll
                #Need:  k_index -> sum(fold_index -> nll)

                k_index_to_sum_nll = None
                top_snps_all = None
                k_list_in_all = None
                for i_fold, (k_list_in, top_snps, k_index_to_nLL) in enumerate(
                        top_snps_and_k_index_to_nLL_sequence):
                    if k_list_in is not None:
                        assert k_list_in_all is None, "real assert"
                        k_list_in_all = k_list_in
                        k_index_to_sum_nll = np.zeros(len(k_list_in))

                    if top_snps is not None:
                        assert top_snps_all is None, "real assert"
                        top_snps_all = top_snps

                    if k_index_to_nLL is not None:
                        assert i_fold < n_folds or n_folds == 1, "real assert"
                        for k_index, nLL in enumerate(k_index_to_nLL):
                            k_index_to_sum_nll[k_index] += nLL

                #find best # top_snps
                best_k = k_list_in_all[np.argmin(k_index_to_sum_nll)]
                logging.info("For chrom={0}, best_k={1}".format(
                    test_chr, best_k))
                if do_plot: _nll_plot(k_list_in_all, k_index_to_sum_nll)

                #Return the top snps from all
                result = top_snps_all[:best_k]
                return result

            i_fold_index_to_top_snps_and_k_index_to_nLL = map_reduce(
                _kfold(G_for_chrom.iid_count, n_folds, seed,
                       end_with_all=True),
                mapper=mapper_gather_lots,
                reducer=reducer_find_best)
            return i_fold_index_to_top_snps_and_k_index_to_nLL