def generate_and_analyze(seed, N, do_shuffle, just_testing=True, map_function=None, cache_folder=None): #Generate SNPs snpdata = snp_gen(fst=.1, dfr=0, iid_count=N, sid_count=1000, chr_count=10, label_with_pop=True, seed=seed) K_causal = snpdata.read_kernel(Unit()).standardize() #Generate geo-spatial locations and K_loc distance_between_centers = 2500000 x0 = distance_between_centers * 0.5 x1 = distance_between_centers * 1.5 y0 = distance_between_centers y1 = distance_between_centers sd = distance_between_centers / 4. spatial_iid = snpdata.iid center_dict = {"0": (x0, y0), "1": (x1, y1)} centers = np.array( [center_dict[iid_item[0]] for iid_item in spatial_iid]) np.random.seed(seed) logging.info("Generating positions for seed {0}".format(seed)) spatial_coor = SnpData( iid=snpdata.iid, sid=["x", "y"], val=centers + np.random.multivariate_normal( [0, 0], [[1, 0], [0, 1]], size=len(centers)) * sd, parent_string="'spatial_coor_gen_original'") alpha = distance_between_centers spatial_val = spatial_similarity(spatial_coor.val, alpha, power=2) K_loc = KernelData(iid=snpdata.iid, val=spatial_val).standardize() #Generate phenotype iid = K_causal.iid iid_count = K_causal.iid_count np.random.seed(seed) pheno_causal = SnpData(iid=iid, sid=["causal"], val=np.random.multivariate_normal( np.zeros(iid_count), K_causal.val).reshape(-1, 1), parent_string="causal") np.random.seed(seed ^ 998372) pheno_noise = SnpData(iid=iid, sid=["noise"], val=np.random.normal(size=iid_count).reshape( -1, 1), parent_string="noise") np.random.seed(seed ^ 12230302) pheno_loc_original = SnpData(iid=iid, sid=["loc_original"], val=np.random.multivariate_normal( np.zeros(iid_count), K_loc.val).reshape(-1, 1), parent_string="loc_original") if do_shuffle: idx = np.arange(iid_count) np.random.seed(seed) np.random.shuffle(idx) pheno_loc = pheno_loc_original.read( view_ok=True ) #don't need to copy, because the next line will be fresh memory pheno_loc.val = pheno_loc.val[idx, :] else: pheno_loc = pheno_loc_original pheno = SnpData(iid=iid, sid=["pheno_all"], val=pheno_causal.val + pheno_noise.val + pheno_loc.val) #Analyze data alpha_list = [ int(v) for v in np.logspace(np.log10(100), np.log10(1e10), 100) ] dataframe = heritability_spatial_correction( snpdata, spatial_coor.val, spatial_iid, alpha_list=[alpha] if just_testing else alpha_list, pheno=pheno, alpha_power=2, jackknife_count=0, permute_plus_count=0, permute_times_count=0, just_testing=just_testing, map_function=map_function, cache_folder=cache_folder) logging.info(dataframe) return dataframe
if __name__ == "__main__": logging.basicConfig(level=logging.INFO) do_plot = False from pysnptools.util import snp_gen from pysnptools.standardizer import Unit seed = 0 N = 5000 #Generate SNPs snpdata = snp_gen(fst=.1, dfr=0, iid_count=N, sid_count=1000, chr_count=10, label_with_pop=True, seed=seed) K_causal = snpdata.read_kernel(Unit()).standardize() if do_plot: pylab.suptitle("$K_{causal}$") pylab.imshow(K_causal.val, cmap=pylab.gray(), vmin=0, vmax=1) pylab.show() import numpy as np from pysnptools.snpreader import SnpData distance_between_centers = 2500000 x0 = distance_between_centers * 0.5