def marginal(accepted, C_limits, num_bin_kde, num_bin_raw, folder, mirror=False): """ Generate marginal and 2d pdfs :param accepted: list of accepted parameters :param C_limits: list of bounds :param num_bin_raw: number of bins in Nd histogram (per dimension) :param num_bin_kde: number of points in Nd KDE :param folder: path to output files :param mirror: (boolen) if to use mirroring """ if not os.path.isdir(folder): os.makedirs(folder) ############################################################################## if num_bin_raw: logging.info( '2D raw marginals with {} bins per dimension'.format(num_bin_raw)) H, C_final_joint = pp.calc_raw_joint_pdf(accepted, num_bin_raw, C_limits) np.savetxt(os.path.join(folder, 'C_final_joint{}'.format(num_bin_raw)), C_final_joint) pp.calc_marginal_pdf_raw(accepted, num_bin_raw, C_limits, folder) del H # ############################################################################## logging.info( '2D smooth marginals with {} bins per dimension'.format(num_bin_kde)) if mirror: mirrored_data, _ = pp.mirror_data_for_kde(accepted, C_limits[:, 0], C_limits[:, 1]) print( f"{len(mirrored_data) - len(accepted)} points were added to {len(accepted)} points" ) Z = gaussian_kde_scipy(mirrored_data, C_limits[:, 0], C_limits[:, 1], num_bin_kde) else: Z = kdepy_fftkde(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde) # Z = gaussian_kde_scipy(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde) C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1]) np.savetxt(os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)), C_final_smooth) # np.savetxt(os.path.join(folder, 'mirrored_limits'), [left, right]) logging.info( 'Estimated parameters from joint pdf: {}'.format(C_final_smooth)) # ############################################################################## np.savez(os.path.join(folder, 'Z.npz'), Z=Z) # Z = np.load(os.path.join(folder, 'Z.npz'))['Z'] pp.calc_marginal_pdf_smooth(Z, num_bin_kde, C_limits, folder) pp.calc_conditional_pdf_smooth(Z, folder) make_pdfs.make_1d_pdf(accepted, num_bin_raw, C_limits, num_bin_kde, folder) del Z
def test_kde2d_symmetric(self): num_bin_joint = 100 data = np.random.multivariate_normal((3, 3), [[0.8, 0], [0, 0.8]], 100) a = [0, 0] b = [6, 6] grid, _ = kde.grid_for_kde(a, b, num_bin_joint) Z_scipy = kde.gaussian_kde_scipy(data, a, b, num_bin_joint) Z_kdepy = kde.kdepy_fftkde(data, a, b, num_bin_joint) # Normalize to summ up to 1 Z_scipy = Z_scipy / np.sum(Z_scipy) Z_kdepy = Z_kdepy / np.sum(Z_kdepy) # find MAP values MAP_scipy = kde.find_MAP_kde(Z_scipy, a, b) MAP_kdepy = kde.find_MAP_kde(Z_kdepy, a, b) np.testing.assert_array_almost_equal(Z_scipy, Z_kdepy, decimal=4) np.testing.assert_array_almost_equal(MAP_scipy, MAP_kdepy)
def update_prior(S_init, C_limits, num_bin_update): # update prior based on accepted parameters in calibration N_params = len(C_limits) prior = kde.kdepy_fftkde(data=np.array(S_init)[:, :N_params], a=C_limits[:, 0], b=C_limits[:, 1], num_bin_joint=num_bin_update) map_calibration = kde.find_MAP_kde(prior, C_limits[:, 0], C_limits[:, 1]) logging.info('Estimated parameter after calibration step is {}'.format( map_calibration)) np.savez(os.path.join(g.path['calibration'], 'prior.npz'), Z=prior) np.savetxt(os.path.join(g.path['calibration'], 'C_final_smooth'), map_calibration) prior_grid = np.empty((N_params, num_bin_update + 1)) for i, limits in enumerate(g.C_limits): prior_grid[i] = np.linspace(limits[0], limits[1], num_bin_update + 1) g.prior_interpolator = RegularGridInterpolator(prior_grid, prior, bounds_error=False) return
def test_kde1d(self): num_bin = 100 data = np.concatenate([norm(-1, 1.).rvs(1600), norm(1, 0.3).rvs(400)]).reshape((-1, 1)) a, b = [-4.5], [3.5] x_grid = np.linspace(a[0], b[0], num_bin + 1) pdf_true = (0.8 * norm(-1, 1).pdf(x_grid) + 0.2 * norm(1, 0.3).pdf(x_grid)) Z_scipy = kde.gaussian_kde_scipy(data, a, b, num_bin) Z_kdepy = kde.kdepy_fftkde(data, a, b, num_bin) # Normalize to summ up to 1 Z_scipy = Z_scipy / np.trapz(Z_scipy, x=x_grid) Z_kdepy = Z_kdepy / np.trapz(Z_kdepy, x=x_grid) # find MAP values MAP_scipy = kde.find_MAP_kde(Z_scipy, a, b)[0] MAP_kdepy = kde.find_MAP_kde(Z_kdepy, a, b)[0] np.testing.assert_array_almost_equal(Z_scipy, Z_kdepy, decimal=3) np.testing.assert_array_almost_equal(MAP_scipy, x_grid[np.argmax(pdf_true)]) np.testing.assert_array_almost_equal(MAP_kdepy, x_grid[np.argmax(pdf_true)]) np.testing.assert_array_almost_equal(MAP_scipy, MAP_kdepy)
def main(args): # Initialization if len(args) > 1: input_path = args[1] else: input_path = os.path.join('../rans_ode', 'params.yml') input = yaml.load(open(input_path, 'r')) ### Paths # path = input['path'] # path = {'output': os.path.join('../runs_abc/', 'output/'), 'valid_data': '../rans_ode/valid_data/'} path = { 'output': os.path.join('../', 'output/'), 'valid_data': '../rans_ode/valid_data/' } print(path) logging.basicConfig(format="%(levelname)s: %(name)s: %(message)s", handlers=[ logging.FileHandler("{0}/{1}.log".format( path['output'], 'ABClog_postprocess0005')), logging.StreamHandler() ], level=logging.DEBUG) logging.info('\n############# POSTPROCESSING ############') x_list = [0.005, 0.01, 0.03, 0.05, 0.1, 0.3] C_limits = np.loadtxt(os.path.join(path['output'], 'C_limits_init')) N_params = 1 files_abc = glob.glob1(path['output'], "classic_abc*.npz") files = [os.path.join(path['output'], i) for i in files_abc] accepted = np.empty((0, N_params)) dist = np.empty((0, 1)) sum_stat = np.empty((0, len(np.load(files[0])['sumstat'][0]))) logging.info('Loading data') for file in files: logging.debug('loading {}'.format(file)) accepted = np.vstack((accepted, np.load(file)['C'][:, :N_params])) sum_stat = np.vstack((sum_stat, np.load(file)['sumstat'])) dist = np.vstack((dist, np.load(file)['dist'].reshape((-1, 1)))) data = np.hstack((accepted, dist)).tolist() np.savetxt(os.path.join(path['output'], '1d_dist_scatter'), data) logging.info('\n############# Classic ABC ############') for x in x_list: logging.info('\n') folder = os.path.join(path['output'], 'x_{}'.format(x * 100)) if not os.path.isdir(folder): os.makedirs(folder) print(folder) print('min dist = ', np.min(dist)) eps = define_eps(data, x) np.savetxt(os.path.join(folder, 'eps'), [eps]) abc_accepted = accepted[np.where(dist < eps)[0]] logging.info('x = {}, eps = {}, N accepted = {} (total {})'.format( x, eps, len(abc_accepted), len(dist))) print(sum_stat.shape, sum_stat[np.where(dist < eps)[0], :].shape) np.savez(os.path.join(path['output'], '1d_dist_scatter_{}'.format(x)), C=abc_accepted, dist=dist[np.where(dist < eps)].reshape((-1, 1)), sumstat=sum_stat[np.where(dist < eps)[0], :]) num_bin_kde = 100 num_bin_raw = 20 ############################################################################## logging.info( '1D raw histogram with {} bins per dimension'.format(num_bin_raw)) x, y = utils.pdf_from_array_with_x(abc_accepted, bins=num_bin_raw, range=C_limits) np.savetxt(os.path.join(folder, 'histogram'), [x, y]) np.savetxt(os.path.join(folder, 'C_final_raw{}'.format(num_bin_raw)), [x[np.argmax(y)]]) # ############################################################################## logging.info('2D smooth marginals with {} bins per dimension'.format( num_bin_kde)) Z = kdepy_fftkde(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde) C_final_smooth = find_MAP_kde(Z, C_limits[0], C_limits[1]) grid = np.linspace(C_limits[0] - 1e-10, C_limits[1] + 1e-10, num_bin_kde + 1) # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde) # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1) np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth) logging.info( 'Estimated parameters from joint pdf: {}'.format(C_final_smooth)) np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid) # for q in [0.05, 0.1, 0.25]: # pp.marginal_confidence(N_params, folder, q) # pp.marginal_confidence_joint(abc_accepted, folder, q) #################################################################################################################### # # #################################################################################################################### logging.info('\n############# Regression ############') path['regression_dist'] = os.path.join(path['output'], 'regression_dist') path['regression_full'] = os.path.join(path['output'], 'regression_full') if not os.path.isdir(path['regression_dist']): os.makedirs(path['regression_dist']) if not os.path.isdir(path['regression_full']): os.makedirs(path['regression_full']) Truth = sumstat.TruthData(valid_folder=path['valid_data'], case=input['case']) ind = np.argsort(dist[:, 0]) accepted = accepted[ind] sum_stat = sum_stat[ind] dist = dist[ind] for x in x_list: logging.info('\n') n = int(x * len(accepted)) print('{} samples are taken for regression ({}% of {})'.format( n, x * 100, len(accepted))) samples = accepted[:n, :N_params] dist_reg = dist[:n, -1].reshape((-1, 1)) ######################################################################### logging.info('Regression with distance') folder = os.path.join(path['regression_dist'], 'x_{}'.format(x * 100)) if not os.path.isdir(folder): os.makedirs(folder) # new_samples, solution = regression(samples, (sum_stat[:n] - Truth.sumstat_true).reshape((-1, 1)), # dist_reg, 1, folder) new_samples, solution = regression(samples, dist_reg, dist_reg, 1, folder) limits = np.empty((N_params, 2)) for i in range(N_params): limits[i, 0] = np.min(new_samples[:, i]) limits[i, 1] = np.max(new_samples[:, i]) if limits[i, 1] - limits[i, 0] < 1e-8: logging.warning('too small new range') limits[i, 0] -= 0.001 limits[i, 1] += 0.001 print('new limits = ', limits) np.savetxt(os.path.join(folder, 'reg_limits'), limits) num_bin_kde_reg = 20 logging.info('2D smooth marginals with {} bins per dimension'.format( num_bin_kde_reg)) Z = kdepy_fftkde(new_samples, [limits[:, 0]], [limits[:, 1]], num_bin_kde_reg) C_final_smooth = find_MAP_kde(Z, [limits[:, 0]], [limits[:, 1]]) grid = np.linspace(limits[0, 0] - 1e-10, limits[0, 1] + 1e-10, num_bin_kde_reg + 1) # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde) # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1) np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth[0]) logging.info('Estimated parameters from joint pdf: {}'.format( C_final_smooth[0])) np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid) # for q in [0.05, 0.1, 0.25]: # pp.marginal_confidence(N_params, folder, q) ########################################################################## logging.info('Regression with full summary statistics') folder = os.path.join(path['regression_full'], 'x_{}'.format(x * 100)) if not os.path.isdir(folder): os.makedirs(folder) new_samples, _ = regression(samples, sum_stat[:n] - Truth.sumstat_true, dist_reg, 1, folder) limits = np.empty((N_params, 2)) for i in range(N_params): limits[i, 0] = np.min(new_samples[:, i]) limits[i, 1] = np.max(new_samples[:, i]) if limits[i, 1] - limits[i, 0] < 1e-8: logging.warning('too small new range') limits[i, 0] -= 0.001 limits[i, 1] += 0.001 print('new limits = ', limits) np.savetxt(os.path.join(folder, 'reg_limits'), limits) num_bin_kde_reg = 20 logging.info('2D smooth marginals with {} bins per dimension'.format( num_bin_kde_reg)) Z = kdepy_fftkde(new_samples, [limits[:, 0]], [limits[:, 1]], num_bin_kde_reg) C_final_smooth = find_MAP_kde(Z, [limits[:, 0]], [limits[:, 1]]) grid = np.linspace(limits[0, 0] - 1e-10, limits[0, 1] + 1e-10, num_bin_kde_reg + 1) # Z, C_final_smooth = gaussian_kde_scipy(abc_accepted, [C_limits[0]], [C_limits[1]], num_bin_kde) # grid = np.linspace(C_limits[0], C_limits[1], num_bin_kde+1) np.savetxt(os.path.join(folder, 'C_final_smooth'), C_final_smooth[0]) logging.info('Estimated parameters from joint pdf: {}'.format( C_final_smooth[0])) np.savez(os.path.join(folder, 'Z.npz'), Z=Z, grid=grid) # for q in [0.05, 0.1, 0.25]: # pp.marginal_confidence(N_params, folder, q) logging.info('\n#############Done############')
def main(): path = { 'output': os.path.join('../', 'rans_output/'), 'valid_data': '../rans_ode/valid_data/' } print(path) logging.basicConfig(format="%(levelname)s: %(name)s: %(message)s", handlers=[ logging.FileHandler("{0}/{1}.log".format( path['output'], 'ABClog_postprocess0005')), logging.StreamHandler() ], level=logging.DEBUG) logging.info('\n############# POSTPROCESSING ############') x_list = [0.3, 0.1, 0.05, 0.03, 0.01] # x_list = [0.3] num_bin_kde = 20 num_bin_raw = 10 C_limits = np.loadtxt(os.path.join(path['output'], 'C_limits_init')) if len(np.array(C_limits).shape) < 2: N_params = 1 else: N_params = len(C_limits) files_abc = glob.glob1(path['output'], "classic_abc*.npz") files = [os.path.join(path['output'], i) for i in files_abc] logging.info('Loading data') dist_unsorted = load_dist(files) ind = np.argsort(dist_unsorted[:, 0]) samples = load_c(files, N_params)[ind] N_total, _ = samples.shape dist = dist_unsorted[ind] # ################################################################################################################## # # # # ################################################################################################################ logging.info('\n############# Regression ############') # # regression_type = ['regression_dist', 'regression_full'] # regression_type = ['regression_full'] regression_type = ['regression_dist'] for type in regression_type: path[type] = os.path.join(path['output'], type) if not os.path.isdir(path[type]): os.makedirs(path[type]) # if type == 'regression_full': # sum_stat = load_sum_stat(files)[ind] for x in x_list: n = int(x * N_total) folder = os.path.join(path[type], 'x_{}'.format(x * 100)) if not os.path.isdir(folder): os.makedirs(folder) logging.info('\n') logging.info('Regression {}'.format(type)) logging.info( '{} samples are taken for regression ({}% of {})'.format( n, x * 100, N_total)) samples = samples[:n, :N_params] dist = dist[:n] # if type == 'regression_full': # sum_stat = sum_stat[:n] ########################################################################## if type == 'regression_dist': new_samples, solution = regression(samples, dist, dist, x=1, folder=folder) # else: # Truth = sumstat.TruthData(valid_folder=path['valid_data'], case=input['case']) # new_samples, solution = regression(samples, sum_stat - Truth.sumstat_true, dist, x=1) limits = new_limits(new_samples, N_params) np.savetxt(os.path.join(folder, 'reg_limits'), limits) Z = kdepy_fftkde(new_samples, limits[:, 0], limits[:, 1], num_bin_kde) # Z, C_final_smooth = pp.gaussian_kde_scipy(new_samples, limits[:, 0], limits[:, 1], num_bin_kde_reg) C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1]) logging.info('Estimated parameters from joint pdf: {}'.format( C_final_smooth)) np.savetxt( os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)), C_final_smooth) np.savez(os.path.join(folder, 'Z.npz'), Z=Z) pp.calc_marginal_pdf_smooth(Z, num_bin_kde, limits, folder) pp.calc_conditional_pdf_smooth(Z, folder) del Z # for q in [0.05, 0.1, 0.25]: # pp.marginal_confidence(N_params, folder, q) # pp.marginal_confidence_joint(new_samples, folder, q) logging.info('\n#############Done############')
def main(args): # Initialization if len(args) > 1: input_path = args[1] else: input_path = os.path.join('../rans_output/', 'params.yml') input = yaml.load(open(input_path, 'r')) ### Paths basefolder = '../' path = { 'output': os.path.join(basefolder, 'rans_output/'), 'plots': os.path.join(basefolder, 'rans_plots/'), 'valid_data': os.path.join(basefolder, 'rans_ode', 'valid_data') } # path = input['path'] path['calibration'] = os.path.join(path['output'], 'calibration/') N_chains = input['parallel_threads'] logging.basicConfig(format="%(levelname)s: %(name)s: %(message)s", handlers=[ logging.FileHandler("{0}/{1}.log".format( path['output'], 'ABClog_postprocess')), logging.StreamHandler() ], level=logging.DEBUG) logging.info('\n############# POSTPROCESSING CHAINS ############') C_limits = np.loadtxt(os.path.join(path['calibration'], 'C_limits')) N_params = len(C_limits) files = np.empty(0) for chain in range(N_chains): files_onechain = glob.glob1(path['output'], "chain{}_*.npz".format(chain)) files = np.hstack( (files, np.array( [os.path.join(path['output'], i) for i in files_onechain]))) accepted = np.empty((0, N_params)) dist = np.empty((0, 1)) sum_stat = np.empty((0, len(np.load(files[0])['sumstat'][0]))) logging.info('Loading data') for file in files: logging.debug('loading {}'.format(file)) accepted = np.vstack((accepted, np.load(file)['C'])) sum_stat = np.vstack((sum_stat, np.load(file)['sumstat'])) dist = np.vstack((dist, np.load(file)['dist'].reshape((-1, 1)))) # data = np.hstack((accepted, dist)).tolist() print(accepted.shape, sum_stat.shape, dist.shape) logging.info('\n') logging.info( '\n############# MCMC-ABC ({} chains) ############'.format(N_chains)) folder = os.path.join(path['output'], 'chains') if not os.path.isdir(folder): os.makedirs(folder) num_bin_kde = 50 num_bin_raw = 20 ############################################################################## logging.info( '2D raw marginals with {} bins per dimension'.format(num_bin_raw)) H, C_final_joint = pp.calc_raw_joint_pdf(accepted, num_bin_raw, C_limits) np.savetxt(os.path.join(folder, 'C_final_joint{}'.format(num_bin_raw)), C_final_joint) pp.calc_marginal_pdf_raw(accepted, num_bin_raw, C_limits, folder) ############################################################################## logging.info( '2D smooth marginals with {} bins per dimension'.format(num_bin_kde)) Z = kdepy_fftkde(accepted, C_limits[:, 0], C_limits[:, 1], num_bin_kde) C_final_smooth = find_MAP_kde(Z, C_limits[:, 0], C_limits[:, 1]) np.savetxt(os.path.join(folder, 'C_final_smooth' + str(num_bin_kde)), C_final_smooth) logging.info( 'Estimated parameters from joint pdf: {}'.format(C_final_smooth)) np.savez(os.path.join(folder, 'Z.npz'), Z=Z) pp.calc_marginal_pdf_smooth(Z, num_bin_kde, C_limits, folder) pp.calc_conditional_pdf_smooth(Z, folder)