def precompute_pca(self):
        """
        compute pcs
        """

        logging.info("computing PCA on train set")
        t0 = time.time()
        
        if not os.path.isfile(self.eigen_fn) or self.force_recompute:

            G = self.snp_reader.read(order='C').standardize().val
            G.flags.writeable = False
            chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(self.snp_reader.pos)

            G_train = G.take(rest_idx, axis=1)

            from sklearn.decomposition import PCA
            pca = PCA()
            pcs = pca.fit_transform(G_train)

            logging.info("saving eigendecomp to file %s" % self.eigen_fn)
            
            eig_dec = {"pcs": pcs}
            save(self.eigen_fn, eig_dec)


            logging.info("time taken for pc computation: " + str(time.time()-t0))
        else:
            logging.info("pc file already exists: %s" % (self.eigen_fn))
    def precompute_pca(self):
        """
        compute pcs
        """

        logging.info("computing PCA on train set")
        t0 = time.time()

        if not os.path.isfile(self.eigen_fn) or self.force_recompute:

            G = self.snp_reader.read(order='C').standardize().val
            G.flags.writeable = False
            chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(
                self.snp_reader.pos)

            G_train = G.take(rest_idx, axis=1)

            from sklearn.decomposition import PCA
            pca = PCA()
            pcs = pca.fit_transform(G_train)

            logging.info("saving eigendecomp to file %s" % self.eigen_fn)

            eig_dec = {"pcs": pcs}
            save(self.eigen_fn, eig_dec)

            logging.info("time taken for pc computation: " +
                         str(time.time() - t0))
        else:
            logging.info("pc file already exists: %s" % (self.eigen_fn))
def compute_core(input_tuple):
    """
    Leave-two-chromosome-out evaluation scheme:
    Chr1: no causals, used for T1-error evaluation
    Chr2: has causals, not conditioned on, used for power evaluation
    Rest: has causals, conditioned on
    
      T1   Pow  [     cond     ] 
    ===== ===== ===== .... =====
            x x   x x      xx
    
    """
    
    
    
    methods, snp_fn, eigen_fn, num_causal, num_pcs, seed, sim_id = input_tuple
    
    # partially load bed file
    from pysnptools.snpreader import Bed
    snp_reader = Bed(snp_fn)

    # determine indices for generation and evaluation
    ##################################################################
    chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(snp_reader.pos)
    
    causal_candidates_idx = np.concatenate((chr2_idx, rest_idx))
    # only compute t1-error (condition on all chr with causals on them)
    #causal_candidates_idx = rest_idx
    test_idx = np.concatenate((chr1_idx, chr2_idx))
    
    if seed is not None:
        np.random.seed(int(seed % sys.maxint))
    
    causal_idx = np.random.permutation(causal_candidates_idx)[0:num_causal]
    
    # generate phenotype
    ###################################################################
    genetic_var = 0.5
    noise_var = 0.5

    y = generate_phenotype(Bed(snp_fn).read(order='C').standardize(), causal_idx, genetic_var, noise_var)
    y.flags.writeable = False


    ############### only alter part until here --> modularize this


    # load pcs
    ###################################################################
    logging.info("loading eigendecomp from file %s" % eigen_fn)
    eig_dec = load(eigen_fn)
    G_pc = eig_dec["pcs"]
    G_pc.flags.writeable = False

    G_pc_ = G_pc[:,0:num_pcs]
    G_pc_norm = DiagKtoN(G_pc_.shape[0]).standardize(G_pc_.copy())
    G_pc_norm.flags.writeable = False
    

    # run feature selection
    #########################################################

    # generate pheno data structure
    pheno = {"iid": snp_reader.iid, "vals": y, "header": []}
    covar = {"iid": snp_reader.iid, "vals": G_pc_norm, "header": []}
    
    # subset readers
    G0 = snp_reader[:,rest_idx]
    test_snps = snp_reader[:,test_idx]
    
    result = {}
    fs_result = {}

    # additional methods can be defined and included in the benchmark
    for method_function in methods:
        result_, fs_result_ = method_function(test_snps, pheno, G0, covar)
        result.update(result_)
        fs_result.update(fs_result_)
    
    # save indices
    indices = {"causal_idx": causal_idx, "chr1_idx": chr1_idx, "chr2_idx": chr2_idx, "input_tuple": input_tuple, "fs_result": fs_result}
    #test_idx
    
    return result, indices
def compute_core(input_tuple):
    """
    Leave-two-chromosome-out evaluation scheme:
    Chr1: no causals, used for T1-error evaluation
    Chr2: has causals, not conditioned on, used for power evaluation
    Rest: has causals, conditioned on
    
      T1   Pow  [     cond     ] 
    ===== ===== ===== .... =====
            x x   x x      xx
    
    """

    methods, snp_fn, eigen_fn, num_causal, num_pcs, seed, sim_id = input_tuple

    # partially load bed file
    from pysnptools.snpreader import Bed
    snp_reader = Bed(snp_fn)

    # determine indices for generation and evaluation
    ##################################################################
    chr1_idx, chr2_idx, rest_idx = split_data_helper.split_chr1_chr2_rest(
        snp_reader.pos)

    causal_candidates_idx = np.concatenate((chr2_idx, rest_idx))
    # only compute t1-error (condition on all chr with causals on them)
    #causal_candidates_idx = rest_idx
    test_idx = np.concatenate((chr1_idx, chr2_idx))

    if seed is not None:
        np.random.seed(int(seed % sys.maxint))

    causal_idx = np.random.permutation(causal_candidates_idx)[0:num_causal]

    # generate phenotype
    ###################################################################
    genetic_var = 0.5
    noise_var = 0.5

    y = generate_phenotype(
        Bed(snp_fn).read(order='C').standardize(), causal_idx, genetic_var,
        noise_var)
    y.flags.writeable = False

    ############### only alter part until here --> modularize this

    # load pcs
    ###################################################################
    logging.info("loading eigendecomp from file %s" % eigen_fn)
    eig_dec = load(eigen_fn)
    G_pc = eig_dec["pcs"]
    G_pc.flags.writeable = False

    G_pc_ = G_pc[:, 0:num_pcs]
    G_pc_norm = DiagKtoN(G_pc_.shape[0]).standardize(G_pc_.copy())
    G_pc_norm.flags.writeable = False

    # run feature selection
    #########################################################

    # generate pheno data structure
    pheno = {"iid": snp_reader.iid, "vals": y, "header": []}
    covar = {"iid": snp_reader.iid, "vals": G_pc_norm, "header": []}

    # subset readers
    G0 = snp_reader[:, rest_idx]
    test_snps = snp_reader[:, test_idx]

    result = {}
    fs_result = {}

    # additional methods can be defined and included in the benchmark
    for method_function in methods:
        result_, fs_result_ = method_function(test_snps, pheno, G0, covar)
        result.update(result_)
        fs_result.update(fs_result_)

    # save indices
    indices = {
        "causal_idx": causal_idx,
        "chr1_idx": chr1_idx,
        "chr2_idx": chr2_idx,
        "input_tuple": input_tuple,
        "fs_result": fs_result
    }
    #test_idx

    return result, indices