def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) train_generator = TrainGenerator(train_generator, cuda=args.cuda) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) train_generator = TrainGenerator(param_generator, train_generator) # SET MODEL logger.info('Set up regressor') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') # MEASUREMENT result_row['nfcn'] = NCALL result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args(main_description="Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # Config config = Config() config.TRUE = Parameter(r=0.1, lam=2.7, mu=0.1) train_generator = Generator(SEED) valid_generator = Generator(SEED+1) test_generator = Generator(SEED+2) X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES) # for nuisance in p(nuisance | data) nuisance_param_sample = [param_generator().nuisance_parameters for _ in range(25)] average_list = [] variance_list = [] all_results = [] for nuisance_params in nuisance_param_sample: logger.info(f"nuisance_params = {nuisance_params}") estimator_values = [] results = {name : value for name, value in zip(config.TRUE.nuisance_parameters_names, nuisance_params)} for i_cv in range(N_ITER): clf = build_model(args, i_cv) parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters) print(parameters) n_samples = config.N_TRAINING_SAMPLES X_train, y_train, w_train = train_generator.generate(*parameters, n_samples=n_samples) logger.info(f"Training {clf.full_name}") # TODO : is it OK to provide w_train to the classifier or useless ? clf.fit(X_train, y_train, w_train) compute_summaries = ClassifierSummaryComputer(clf, n_bins=10) nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = lambda mu : nll_computer(*nuisance_params, mu) minimizer = get_minimizer(compute_nll) results.update(evaluate_minuit(minimizer, [config.TRUE.interest_parameters])) all_results.append(results.copy()) # TODO : Add results to some csv estimator_values.append(results['mu']) average_list.append(np.mean(estimator_values)) variance_list.append(np.var(estimator_values)) logger.info(f"average_list {average_list}") logger.info(f"variance_list {variance_list}") v_stat = np.mean(variance_list) v_syst = np.var(average_list) v_total = v_stat + v_syst logger.info(f"V_stat = {v_stat}") logger.info(f"V_syst = {v_syst}") logger.info(f"V_total = {v_total}")
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # Setup model logger.info("Setup model") model = build_model(args, 0) os.makedirs(model.results_directory, exist_ok=True) # Setup data logger.info("Setup data") config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) seed = SEED + 99999 train_generator = TrainGenerator(param_generator, Generator(seed)) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) i_cv = 0 result_row = {'i_cv': i_cv} # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') print_line() result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_directory, 'results.csv')) logger.info('Plot params') param_names = [CALIB_PARAM_NAME] for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_directory) logger.info('DONE')
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME) calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME) evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_r, calib_lam, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # INFO args.net = AR5R5E(n_in=3, n_out=2, n_extra=2) args.optimizer = get_optimizer(args) model = get_model(args, Regressor) model.set_info(DATA_NAME, BENCHMARK_NAME, -1) pb_config = S3D2Config() # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.results_directory, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = S3D2Config() seed = SEED + i_cv * 5 test_seed = seed + 2 result_table = [ run_iter(i_cv, i, test_config, test_seed, directory) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) N_BINS = 10 X_train, y_train, w_train = train_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train) result_table = [run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 # train_generator = Generator(seed) # valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL # logger.info('Set up classifier') model = build_model(args, i_cv) # flush(logger) # TRAINING / LOADING # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT N_BINS = 10 # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') result_table = [run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def main(): logger = set_logger() logger.info("Hello world !") os.makedirs(DIRECTORY, exist_ok=True) set_plot_config() args = None config = S3D2Config() results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(DIRECTORY, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv')) gather_images(DIRECTORY)
def param_generator(): pb_config = S3D2Config() # r = np.random.normal(pb_config.CALIBRATED_R, pb_config.CALIBRATED_R_ERROR) # lam = -1 # while lam <= 0: # lam = np.random.normal(pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_LAMBDA_ERROR) r = pb_config.CALIBRATED_R lam = pb_config.CALIBRATED_LAMBDA mu_min = min(pb_config.TRUE_MU_RANGE) mu_max = max(pb_config.TRUE_MU_RANGE) mu_range = mu_max - mu_min mu_min = max(0.0, mu_min - mu_range / 10) mu_max = min(1.0, mu_max + mu_range / 10) mu = np.random.uniform(0, 1) return Parameter( r, lam, mu, )
def likelihood_fit(): print("Hello world !") set_plot_config() config = S3D2Config() DATA_N_SAMPLES = 80_000 result_table = [] for mu in config.TRUE_MU_RANGE[1:]: result_row = {} config.TRUE_MU = mu generator = S3D2(SEED) data, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE_MU, size=DATA_N_SAMPLES) n_sig = np.sum(label==1) n_bkg = np.sum(label==0) print(f"nb of signal = {n_sig}") print(f"nb of backgrounds = {n_bkg}") compute_nll = lambda r, lam, mu : generator.nll(data, r, lam, mu) print('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config) fmin, params = estimate(minimizer) params_truth = [config.TRUE_R, config.TRUE_LAMBDA, config.TRUE_MU] my_print_params(params, params_truth) register_params(params, params_truth, result_row) result_row['is_mingrad_valid'] = minimizer.migrad_ok() result_row.update(fmin) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(DIRECTORY, 'results.csv')) print('Plot params') param_names = config.PARAM_NAMES for name in param_names: my_plot_params(name, result_table)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = S3D2Config() seed = config.SEED + i_cv * 5 train_generator = S3D2(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up rergessor') args.net = AR5R5E(n_in=3, n_out=2, n_extra=2) args.optimizer = get_optimizer(args) model = get_model(args, Regressor) model.set_info(BENCHMARK_NAME, i_cv) model.param_generator = param_generator flush(logger) # TRAINING / LOADING if not args.retrain: try: logger.info('loading from {}'.format(model.model_path)) model.load(model.model_path) except Exception as e: logger.warning(e) args.retrain = True if args.retrain: logger.info('Training {}'.format(model.get_name())) model.fit(train_generator) logger.info('Training DONE') # SAVE MODEL save_model(model) # CHECK TRAINING logger.info('Plot losses') plot_REG_losses(model) plot_REG_log_mse(model) result_row['loss'] = model.losses[-1] result_row['mse_loss'] = model.mse_losses[-1] # MEASUREMENT for mu in pb_config.TRUE_MU_RANGE: pb_config.TRUE_MU = mu logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( # pb_config.TRUE_R, # pb_config.TRUE_LAMBDA, pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.TRUE_MU, n_samples=pb_config.N_TESTING_SAMPLES) p_test = np.array( (pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA)) pred, sigma = model.predict(X_test, w_test, p_test) name = pb_config.INTEREST_PARAM_NAME result_row[name] = pred result_row[name + _ERROR] = sigma result_row[name + _TRUTH] = pb_config.TRUE_MU logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_MU, pred, sigma)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') name = pb_config.INTEREST_PARAM_NAME plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def explore(): print("Hello master !") set_plot_config() config = S3D2Config() N_SAMPLES = 10_000 R_MIN = -0.3 R_MAX = 0.3 LAM_MIN = 2 LAM_MAX = 4 MU_MIN = 0.0 MU_MAX = 1.0 generator = S3D2(SEED) X, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=N_SAMPLES) n_sig = np.sum(label==1) n_bkg = np.sum(label==0) print(f"nb of signal = {n_sig}") print(f"nb of backgrounds = {n_bkg}") df = pd.DataFrame(X, columns=["x1","x2","x3"]) df['label'] = label g = sns.PairGrid(df, vars=["x1","x2","x3"], hue='label') g = g.map_upper(sns.scatterplot) g = g.map_diag(sns.kdeplot) g = g.map_lower(sns.kdeplot, n_levels=6) g = g.add_legend() # g = g.map_offdiag(sns.kdeplot, n_levels=6) g.savefig(os.path.join(DIRECTORY, 'pairgrid.png')) plt.clf() nll = generator.nll(X, config.TRUE.r, config.TRUE.lam, config.TRUE.mu) print(f"NLL = {nll}") R_RANGE = np.linspace(R_MIN, R_MAX, 30) nll = [generator.nll(X, r, config.TRUE.lam, config.TRUE.mu) for r in R_RANGE] min_nll = R_RANGE[np.argmin(nll)] plt.plot(R_RANGE, nll, label="nll(r)") plt.axvline(config.TRUE.r, c="orange", label="true r") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("r") plt.ylabel("NLL") plt.title("NLL according to r param") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'NLL_r.png')) plt.clf() LAM_RANGE = np.linspace(LAM_MIN, LAM_MAX, 30) nll = [generator.nll(X, config.TRUE.r, lam, config.TRUE.mu) for lam in LAM_RANGE] min_nll = LAM_RANGE[np.argmin(nll)] plt.plot(LAM_RANGE, nll, label="nll(lam)") plt.axvline(config.TRUE.lam, c="orange", label="true lam") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("$\lambda$") plt.ylabel("NLL") plt.title("NLL according to $\lambda$ param") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'NLL_lambda.png')) plt.clf() MU_RANGE = np.linspace(MU_MIN, MU_MAX, 30) nll = [generator.nll(X, config.TRUE.r, config.TRUE.lam, mu) for mu in MU_RANGE] min_nll = MU_RANGE[np.argmin(nll)] plt.plot(MU_RANGE, nll, label="nll(mu)") plt.axvline(config.TRUE.mu, c="orange", label="true mu") plt.axvline(min_nll, c="red", label="min nll") plt.xlabel("$\mu$") plt.ylabel("NLL") plt.title("NLL according to $\mu$ param") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'NLL_mu.png')) plt.clf()
def main(): print("Hello world !") set_plot_config() config = S3D2Config() DATA_N_SAMPLES = 8_000 R_MIN = -0.3 R_MAX = 0.3 LAM_MIN = 2 LAM_MAX = 4 MU_MIN = 0.1 MU_MAX = 0.3 R_N_SAMPLES = 101 LAM_N_SAMPLES = 102 MU_N_SAMPLES = 103 prior_r = stats.uniform(loc=R_MIN, scale=R_MAX-R_MIN) prior_lam = stats.uniform(loc=LAM_MIN, scale=LAM_MAX-LAM_MIN) prior_mu = stats.uniform(loc=MU_MIN, scale=MU_MAX-MU_MIN) r_grid = np.linspace(R_MIN, R_MAX, R_N_SAMPLES) lam_grid = np.linspace(LAM_MIN, LAM_MAX, LAM_N_SAMPLES) mu_grid = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES) data_generator = S3D2(SEED) data, label = data_generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=DATA_N_SAMPLES) n_sig = np.sum(label==1) n_bkg = np.sum(label==0) print(f"nb of signal = {n_sig}") print(f"nb of backgrounds = {n_bkg}") shape = (R_N_SAMPLES, LAM_N_SAMPLES, MU_N_SAMPLES) n_elements = np.prod(shape) print(f"3D grid has {n_elements} elements") log_likelihood = np.zeros(shape) log_prior_proba = np.zeros(shape) for i, j, k in tqdm(itertools.product(range(R_N_SAMPLES), range(LAM_N_SAMPLES), range(MU_N_SAMPLES)), total=n_elements): log_likelihood[i, j, k] = data_generator.log_proba_density(data, r_grid[i], lam_grid[j], mu_grid[k]).sum() log_prior_proba[i, j, k] = prior_r.logpdf(r_grid[i]) \ + prior_lam.logpdf(lam_grid[j]) \ + prior_mu.logpdf(mu_grid[k]) element_min = (log_likelihood + log_prior_proba).min() print("min element = ", element_min) posterior_r_lam_mu = softmax(log_likelihood + log_prior_proba) n_zeros = (posterior_r_lam_mu == 0).sum() n_elements = np.prod(posterior_r_lam_mu.shape) print() print(f"number of zeros in posterior = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)") marginal_r = posterior_r_lam_mu.sum(axis=2).sum(axis=1) marginal_lam = posterior_r_lam_mu.sum(axis=2).sum(axis=0) marginal_mu = posterior_r_lam_mu.sum(axis=1).sum(axis=0) marginal_r_lam = posterior_r_lam_mu.sum(axis=2) assert marginal_r.shape == r_grid.shape, "sum along the wrong axis for marginal r" assert marginal_lam.shape == lam_grid.shape, "sum along the wrong axis for marginal lam" assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu" assert marginal_r_lam.shape == (R_N_SAMPLES, LAM_N_SAMPLES), "sum along the wrong axis for marginal (r, lam)" n_zeros = (marginal_r == 0).sum() n_elements = np.prod(marginal_r.shape) print(f"number of zeros in marginal r = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)") n_zeros = (marginal_lam == 0).sum() n_elements = np.prod(marginal_lam.shape) print(f"number of zeros in marginal lam = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)") n_zeros = (marginal_mu == 0).sum() n_elements = np.prod(marginal_mu.shape) print(f"number of zeros in marginal mu = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)") n_zeros = (marginal_r_lam == 0).sum() n_elements = np.prod(marginal_r_lam.shape) print(f"number of zeros in marginal r_lam = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)") posterior_mu = np.divide(posterior_r_lam_mu, marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES, 1), out=np.zeros_like(posterior_r_lam_mu), where=(posterior_r_lam_mu!=0)) print("probability densities should sum to one") # TODO : posterior_mu sum to SOME_N_SAMPLES. is it ok ? # TODO : with new division policy posterior_mu/ALPHA_N sums to 1-zero_ration in marginal_y # ... It does not look good print(np.sum(posterior_mu)/n_elements, np.sum(posterior_r_lam_mu), np.sum(marginal_r), np.sum(marginal_lam)) print(np.sum(marginal_r_lam)) print() print("True mu value =", config.TRUE.mu) sig_ratio = n_sig/DATA_N_SAMPLES print("Sig ratio =", sig_ratio) expect_mu = expectancy(mu_grid, marginal_mu) print("E[mu|x] =", expect_mu) full_var = variance(mu_grid, marginal_mu) print("Var[mu|x] =", full_var) std_mu = np.sqrt(full_var) print("sqrt(Var[mu|x]) =", std_mu) print("argmax_mu p(mu|x) =", mu_grid[np.argmax(marginal_mu)]) i_max, j_max, k_max = np.unravel_index(np.argmax(log_likelihood), log_likelihood.shape) assert np.max(log_likelihood) == log_likelihood[i_max, j_max, k_max], "max and argmax should point to the same value" print("argmax_r_lam_mu logp(x|r, lam, mu) =", r_grid[i_max], lam_grid[j_max], mu_grid[k_max]) stat_err = stat_uncertainty(mu_grid, posterior_mu, marginal_r_lam) print("stat_uncertainty=", stat_err) stat_err = stat_uncertainty2(mu_grid, posterior_mu, marginal_r_lam) print("stat_uncertainty=", stat_err) stat_err = stat_uncertainty3(mu_grid, posterior_mu, marginal_r_lam) print("stat_uncertainty=", stat_err) print("syst_uncertainty=", full_var - stat_err) syst_err = syst_uncertainty(mu_grid, posterior_mu, marginal_r_lam, marginal_mu) print("syst_uncertainty=", syst_err) syst_err = syst_uncertainty2(mu_grid, posterior_mu, marginal_r_lam) print("syst_uncertainty=", syst_err) syst_err = syst_uncertainty3(mu_grid, posterior_mu, marginal_r_lam) print("syst_uncertainty=", syst_err) print() print("check marginals") print("mu ", marginal_mu.min(), marginal_mu.max()) print("lam ", marginal_lam.min(), marginal_lam.max()) print("r ", marginal_r.min(), marginal_r.max()) print("check posterior") print("p(y|x) ", posterior_mu.min(), posterior_mu.max()) print("p(y|x,a)", posterior_r_lam_mu.min(), posterior_r_lam_mu.max()) # return None plt.axvline(config.TRUE.mu, c="orange", label="true mu") plt.axvline(config.TRUE.mu-std_mu, c="orange", label="true mu - std(mu)") plt.axvline(config.TRUE.mu+std_mu, c="orange", label="true mu + std(mu)") plt.axvline(sig_ratio, c="red", label="signal ratio") plt.axvline(expect_mu, c="green", label="E[mu|x]") plt.plot(mu_grid, marginal_mu, label="posterior") plt.xlabel("mu") plt.ylabel("proba density") plt.title("posterior marginal proba of mu vs mu values") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'marginal_mu.png')) plt.clf() plt.plot(lam_grid, marginal_lam, label="posterior") plt.axvline(config.TRUE.lam, c="orange", label="true lambda") plt.xlabel("lambda") plt.ylabel("proba density") plt.title("posterior marginal proba of lam vs lam values") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'marginal_lam.png')) plt.clf() plt.plot(r_grid, marginal_r, label="posterior") plt.axvline(config.TRUE.r, c="orange", label="true r") plt.xlabel("r") plt.ylabel("proba density") plt.title("posterior marginal proba of r vs r values") plt.legend() plt.savefig(os.path.join(DIRECTORY, 'marginal_r.png')) plt.clf()