예제 #1
0
    def generate_and_analyze(seed,
                             N,
                             do_shuffle,
                             just_testing=True,
                             map_function=None,
                             cache_folder=None):

        #Generate SNPs
        snpdata = snp_gen(fst=.1,
                          dfr=0,
                          iid_count=N,
                          sid_count=1000,
                          chr_count=10,
                          label_with_pop=True,
                          seed=seed)
        K_causal = snpdata.read_kernel(Unit()).standardize()

        #Generate geo-spatial locations and K_loc
        distance_between_centers = 2500000
        x0 = distance_between_centers * 0.5
        x1 = distance_between_centers * 1.5
        y0 = distance_between_centers
        y1 = distance_between_centers
        sd = distance_between_centers / 4.

        spatial_iid = snpdata.iid
        center_dict = {"0": (x0, y0), "1": (x1, y1)}
        centers = np.array(
            [center_dict[iid_item[0]] for iid_item in spatial_iid])
        np.random.seed(seed)
        logging.info("Generating positions for seed {0}".format(seed))
        spatial_coor = SnpData(
            iid=snpdata.iid,
            sid=["x", "y"],
            val=centers + np.random.multivariate_normal(
                [0, 0], [[1, 0], [0, 1]], size=len(centers)) * sd,
            parent_string="'spatial_coor_gen_original'")
        alpha = distance_between_centers
        spatial_val = spatial_similarity(spatial_coor.val, alpha, power=2)
        K_loc = KernelData(iid=snpdata.iid, val=spatial_val).standardize()

        #Generate phenotype
        iid = K_causal.iid
        iid_count = K_causal.iid_count
        np.random.seed(seed)
        pheno_causal = SnpData(iid=iid,
                               sid=["causal"],
                               val=np.random.multivariate_normal(
                                   np.zeros(iid_count),
                                   K_causal.val).reshape(-1, 1),
                               parent_string="causal")
        np.random.seed(seed ^ 998372)
        pheno_noise = SnpData(iid=iid,
                              sid=["noise"],
                              val=np.random.normal(size=iid_count).reshape(
                                  -1, 1),
                              parent_string="noise")
        np.random.seed(seed ^ 12230302)
        pheno_loc_original = SnpData(iid=iid,
                                     sid=["loc_original"],
                                     val=np.random.multivariate_normal(
                                         np.zeros(iid_count),
                                         K_loc.val).reshape(-1, 1),
                                     parent_string="loc_original")

        if do_shuffle:
            idx = np.arange(iid_count)
            np.random.seed(seed)
            np.random.shuffle(idx)
            pheno_loc = pheno_loc_original.read(
                view_ok=True
            )  #don't need to copy, because the next line will be fresh memory
            pheno_loc.val = pheno_loc.val[idx, :]
        else:
            pheno_loc = pheno_loc_original

        pheno = SnpData(iid=iid,
                        sid=["pheno_all"],
                        val=pheno_causal.val + pheno_noise.val + pheno_loc.val)

        #Analyze data
        alpha_list = [
            int(v) for v in np.logspace(np.log10(100), np.log10(1e10), 100)
        ]
        dataframe = heritability_spatial_correction(
            snpdata,
            spatial_coor.val,
            spatial_iid,
            alpha_list=[alpha] if just_testing else alpha_list,
            pheno=pheno,
            alpha_power=2,
            jackknife_count=0,
            permute_plus_count=0,
            permute_times_count=0,
            just_testing=just_testing,
            map_function=map_function,
            cache_folder=cache_folder)

        logging.info(dataframe)
        return dataframe
예제 #2
0
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    do_plot = False
    from pysnptools.util import snp_gen
    from pysnptools.standardizer import Unit

    seed = 0
    N = 5000

    #Generate SNPs
    snpdata = snp_gen(fst=.1,
                      dfr=0,
                      iid_count=N,
                      sid_count=1000,
                      chr_count=10,
                      label_with_pop=True,
                      seed=seed)
    K_causal = snpdata.read_kernel(Unit()).standardize()

    if do_plot:
        pylab.suptitle("$K_{causal}$")
        pylab.imshow(K_causal.val, cmap=pylab.gray(), vmin=0, vmax=1)
        pylab.show()

    import numpy as np
    from pysnptools.snpreader import SnpData

    distance_between_centers = 2500000
    x0 = distance_between_centers * 0.5