def run_bootstrap_sim(grid, parallel=True, rerun=False): cache_name = f"hb_gaussian_bootstrap" # cache_name = f"proposed_{B}" # cache_name = f"proposed_tmp" lsims = load_cache(cache_name) f_grid = [] # f_grid = grid # f_grid = filter_complete_exps(grid,lsims) # rerun = True if rerun is False and len(f_grid) == 0: print(f"Loaded all sims from cache [{cache_name}]") return lsims print(f"Simulating [{len(f_grid)}] Experimental Setups") if parallel: # pParallel = Parallel(n_jobs=8) pParallel = ProgressParallel(True, len(f_grid), n_jobs=8) delayed_fxn = delayed(sim_bootstrap) sims = pParallel( delayed_fxn(p.eps, p.std, p.D, p.T, p.B, p.size) for p in f_grid) else: sims = [] for p in tqdm(f_grid): sims.append(sim_bootstrap(p.eps, p.std, p.D, p.T, p.B, p.size)) sims = pd.DataFrame(sims) sims = combine_simulations(sims, lsims) store_cache(sims, cache_name) return sims
def run_frame_v_mean_sim(grid, parallel=True, rerun=False): cache_name = f"hb_gaussian_fvm" lsims = load_cache(cache_name) # f_grid = [] # f_grid = grid grid = filter_grid_field(grid, 'B') f_grid = filter_complete_exps(grid, lsims) # rerun = True if rerun is False and len(f_grid) == 0: return lsims print( f"Simulating [{len(f_grid)}] Experimental Setups with Cache [{cache_name}]" ) if parallel: pParallel = ProgressParallel(True, len(f_grid), n_jobs=8) delayed_fxn = delayed(sim_frame_v_mean) sims = pParallel( delayed_fxn(p.eps, p.std, p.D, p.T, p.size) for p in f_grid) else: sims = [] for p in f_grid: sims.append(sim_frame_v_mean(p.eps, p.std, p.D, p.T, p.size)) sims = pd.DataFrame(sims) sims = combine_simulations(sims, lsims) store_cache(sims, cache_name) return sims
def get_standard_sims(pgrid, parallel=True): sims = load_cache("standard") rerun = False if not (rerun) and not (sims is None): return sims def sim_standard_single(D, mu2, std, T, reps): def sim_v1(D, mu2, std, T, reps): gaussian_std = math.sqrt(4 * mu2 * std**2) / D x = npr.normal(loc=mu2, scale=gaussian_std, size=(T, reps)) gamma_shape = D / 2 gamma_scale = 2 * (std**2) / D y = npr.gamma(gamma_shape, scale=gamma_scale, size=(T, reps)) z = npr.gamma(gamma_shape, scale=gamma_scale, size=(T, reps)) left = np.mean(z, axis=0) right = np.mean(y + x, axis=0) cond = left < right return cond def sim_v2(D, mu2, std, T, reps): D = int(D) left = npr.normal(loc=0, scale=std, size=(D, T, reps))**2 left = np.mean(np.mean(left, axis=0), axis=0) right = npr.normal(loc=math.sqrt(mu2), scale=std, size=(D, T, reps))**2 right = np.mean(np.mean(right, axis=0), axis=0) cond = left < right return cond cond = sim_v1(D, mu2, 2 * std, T, reps) sim = edict() sim.est_mean = np.mean(cond) sim.est_std = np.std(cond) # -- include parameters -- sim.mu2 = mu2 sim.std = std sim.D = D sim.T = T sim = dict(sim) return sim if parallel: # pParallel = Parallel(n_jobs=8) pParallel = ProgressParallel(True, len(pgrid), n_jobs=8) sims = pParallel( delayed(sim_standard_single)(p.D, p.mu2, p.std, p.T, p.size) for p in pgrid) else: sims = [] for p in pgrid: sims.append(sim_standard_single(p.D, p.mu2, p.std, p.T, p.size)) sims = pd.DataFrame(sims) store_cache(sims, "standard") return sims
def get_proposed_sims(pgrid, parallel=True, rerun=False): B = pgrid[0].B cache_name = f"proposed" # cache_name = f"proposed_{B}" # cache_name = f"proposed_tmp" lsims = load_cache(cache_name) print(lsims['T'].unique()) # f_pgrid = filter_complete_exps(pgrid,lsims) f_pgrid = [] # rerun = True if rerun is False and len(f_pgrid) == 0: return lsims print(f"Simulating [{len(f_pgrid)}] Experimental Setups") def sim_proposed_single(D, pmis, ub, std, T, B, size): def numba_subset_mean(smean, pix, subset): return numba_subset_mean_along_axis(smean, pix, subset) def numba_mat_count_uniques_bs(mat, n_uniques): B, T, S = mat.shape for b in range(B): for s in range(S): q = np.zeros(256, dtype=int) n_unique = numba_unique(mat[b, :, s], q) n_uniques[b, s] = n_unique def numba_mat_count_uniques(mat, ncols): n_uniques = [] for c in range(ncols): q = np.zeros(256, dtype=int) n_unique = numba_unique(mat[:, c], q) n_uniques.append(n_unique) return n_uniques def count_unique_cols(mat): ncols = mat.shape[1] n_uniques = numba_mat_count_uniques(mat, ncols) return np.array(n_uniques) def sim_gaussian2(D, mu2, std): gaussian_std = 2 * std * np.sqrt(mu2) / D x = npr.normal(loc=mu2, scale=gaussian_std) gamma_shape = D / 2 gamma_scale = 2 * (std**2) / D y = npr.gamma(gamma_shape, scale=gamma_scale) return x + y def sim_v1(D, pmis, ub, std, T, B, size): # -- 1.) simulate misalignment -- nmis = int(T * (pmis / 100.)) if nmis == 0 and not np.isclose(pmis, 0): raise ValueError("At least one should be misaligned.") mis = npr.uniform(0, ub, (nmis, D, size)) pix = np.zeros((T, D, size)) pix[:nmis] = mis # -- setup refs -- zerosB = np.zeros((B, size)) zeros = np.zeros(size) pix_mean = np.mean(pix, axis=0) pix_smean = np.zeros((D, size)) muB = np.zeros((B, size)) n_uniques_B = np.zeros((B, size)) # # -- 2.) simulate \hat{MSE}(\bar{X}) -- # # -- (a) create subsets for each B and size -- subset_B = npr.choice(T, (B, T, size)).astype(np.int) # -- (b) count unique for each trial along dim T -- numba_mat_count_uniques_bs(subset_B, n_uniques_B) # -- (c) use # of unique to compuute std of sample "b" -- std_B = std / n_uniques_B + std / T # -- (d) simulate "aligned" -- samples_zero = np.mean(sim_gaussian2(D, zerosB, std_B), axis=0) # -- (e) simulate "misaligned" -- numba_compute_muB(muB, pix, pix_smean, pix_mean, subset_B) samples_pix = np.mean(sim_gaussian2(D, muB, std_B), axis=0) cond = samples_zero < samples_pix return cond, muB # print("params: ",D,pmis,ub,std,T,B,size) cond, mu2_samples = sim_v1(D, pmis, ub, std, T, B, size) sim = edict() sim.est_mean = np.mean(cond) sim.est_std = np.std(cond) sim.est_mu2_mean = np.mean(mu2_samples) sim.est_mu2_std = np.std(mu2_samples) # -- include parameters -- sim.pmis = pmis sim.ub = ub sim.std = std sim.D = D sim.T = T sim.B = B sim = dict(sim) return sim if parallel: # pParallel = Parallel(n_jobs=8) pParallel = ProgressParallel(True, len(f_pgrid), n_jobs=8) delayed_fxn = delayed(sim_proposed_single) sims = pParallel( delayed_fxn(p.D, p.pmis, p.ub, p.std, p.T, p.B, p.size) for p in f_pgrid) else: sims = [] for p in f_pgrid: sims.append( sim_proposed_single(p.D, p.pmis, p.ub, p.std, p.T, p.B, p.size)) sims = pd.DataFrame(sims) sims = combine_simulations(sims, lsims) store_cache(sims, cache_name) return sims