def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info( f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples" ) for threshold in np.linspace(0, 1, 500): result_row = {'i_cv': i_cv} result_row['threshold'] = threshold result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES X, y, w = valid_generator.generate( *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold beta = np.sum(y[selected] == 0) gamma = np.sum(y[selected] == 1) result_row['beta'] = beta result_row['gamma'] = gamma X, y, w = test_generator.generate( *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold n_selected = np.sum(selected) n_selected_bkg = np.sum(y[selected] == 0) n_selected_sig = np.sum(y[selected] == 1) result_row['n'] = n_selected result_row['b'] = n_selected_bkg result_row['s'] = n_selected_sig result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected) result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected) results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up inferno model') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_inferno(model, prefix='valid') # MEASUREMENT calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME) calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME) evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_r, calib_lam, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_rescale, n_bins=10): logger = logging.getLogger() logger.info('-' * 45) logger.info(f'iter : {i_iter}') flush(logger) iter_directory = os.path.join(model.results_path, f'iter_{i_iter}') os.makedirs(iter_directory, exist_ok=True) result_row['i'] = i_iter result_row['n_test_samples'] = config.N_TESTING_SAMPLES suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}' logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory) # CALIBRATION rescale_mean, rescale_sigma = calib_rescale.predict(X_test, w_test) logger.info('rescale = {} =vs= {} +/- {}'.format(config.TRUE.rescale, rescale_mean, rescale_sigma)) config.CALIBRATED = Parameter(rescale_mean, config.CALIBRATED.interest_parameters) config.CALIBRATED_ERROR = Parameter( rescale_sigma, config.CALIBRATED_ERROR.interest_parameters) for name, value in config.CALIBRATED.items(): result_row[name + "_calib"] = value for name, value in config.CALIBRATED_ERROR.items(): result_row[name + "_calib_error"] = value logger.info('Set up NLL computer') compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) # NLL PLOTS plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix) # MEASURE STAT/SYST VARIANCE logger.info('MEASURE STAT/SYST VARIANCE') conditional_results = make_conditional_estimation(compute_nll, config) fname = os.path.join(iter_directory, "no_nuisance.csv") conditional_estimate = pd.DataFrame(conditional_results) conditional_estimate['i'] = i_iter conditional_estimate.to_csv(fname) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR) result_row.update(evaluate_minuit(minimizer, config.TRUE)) return result_row.copy(), conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT calib_rescale = load_calib_rescale() N_BINS = 10 evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_rescale, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = [e0 for e0, e1 in iter_results] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) conditional_estimate = pd.concat([e1 for e0, e1 in iter_results]) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return result_table, conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = Config() seed = config.SEED + i_cv * 5 train_generator = Synthetic3DGeneratorTorch(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up inferno') model = build_model(args, i_cv) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model)) logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_VALIDATION_SAMPLES) # MEASUREMENT N_BINS = args.n_bins compute_summaries = model.compute_summaries for mu in pb_config.TRUE_MU_RANGE: true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu) suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}' logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( *true_params, n_samples=pb_config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_valid, y_valid, w_valid, X_test, w_test, n_bins=N_BINS, prefix='', suffix=suffix) logger.info('Set up NLL computer') compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test, w_test) # NLL PLOTS plot_nll_around_min(compute_nll, true_params, model.path, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED, pb_config.CALIBRATED_ERROR) fmin, params = estimate(minimizer) result_row.update(evaluate_minuit(minimizer, fmin, params, true_params)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') # MEASUREMENT calibs = {} calibs['tes'] = load_calib_tes(DATA_NAME, BENCHMARK_NAME) calibs['jes'] = load_calib_jes(DATA_NAME, BENCHMARK_NAME) calibs['les'] = load_calib_les(DATA_NAME, BENCHMARK_NAME) result_row['nfcn'] = NCALL iter_results = [ run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calibs) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = parse_args() logger.info(args) flush(logger) # SET MODEL model = get_model(args) # LOAD/GENERATE DATA logger.info('Generating data ...') pb_config = Config() generator = Synthetic3D(seed=config.SEED, n_expected_events=1050) generator.N_SIG = pb_config.N_SIG generator.N_BKG = pb_config.N_BKG D_train = generator.train_sample(pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_TRAINING_SAMPLES) D_test = generator.test_sample(pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU) X_train, y_train, w_train = split_data_label_weights(D_train) X_test, y_test, w_test = split_data_label_weights(D_test) # TRAINING model.fit(X_train, y_train, w_train) # SAVE MODEL i = 99 model_name = '{}-{}'.format(model.get_name(), i) model_path = os.path.join(config.SAVING_DIR, model_name) logger.info("Saving in {}".format(model_path)) os.makedirs(model_path, exist_ok=True) model.save(model_path) # CHECK TRAINING plot_test_distrib(model, model_name, model_path, X_test, y_test) plot_summaries(model, model_name, model_path, X_test, y_test, w_test) # NLL summary_computer = lambda X, w: compute_summaries(model, X, w, n_bins=10) D_final = generator.final_sample(pb_config.TRUE_R, pb_config.TRUE_LAMBDA, pb_config.TRUE_MU) X_final, y_final, w_final = split_data_label_weights(D_final) compute_nll = Synthetic3DNLL(summary_computer, generator, X_final, w_final) # NLL PLOTS plot_R_around_min(compute_nll, model_path) plot_LAMBDA_around_min(compute_nll, model_path) plot_MU_around_min(compute_nll, model_path) # MINIMIZE NLL minimizer = iminuit.Minuit( compute_nll, errordef=ERRORDEF_NLL, r=pb_config.CALIBRATED_R, error_r=pb_config.CALIBRATED_R_ERROR, #limit_r=(0, None), lam=pb_config.CALIBRATED_LAMBDA, error_lam=pb_config.CALIBRATED_LAMBDA_ERROR, limit_lam=(0, None), mu=pb_config.CALIBRATED_MU, error_mu=pb_config.CALIBRATED_MU_ERROR, limit_mu=(0, 1), ) minimizer.print_param() fmin, param = minimizer.migrad() param = minimizer.hesse() for name, (value, err) in {p['name']: (p['value'], p['error']) for p in param}.items(): print('{name:3} = {value} ({err})'.format(**locals())) print('true_r', pb_config.TRUE_R) print('true_lam', pb_config.TRUE_LAMBDA) print('true_mu', pb_config.TRUE_MU) print(param[2]['value'] * 1050, 'signal events estimated') print(param[2]['error'] * 1050, 'error on # estimated sig event') print('Done.')