def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples") for threshold in np.linspace(0, 1, 500): result_row = {'i_cv': i_cv} result_row['threshold'] = threshold result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES X, y, w = valid_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold beta = np.sum(y[selected] == 0) gamma = np.sum(y[selected] == 1) result_row['beta'] = beta result_row['gamma'] = gamma X, y, w = test_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold n_selected = np.sum(selected) n_selected_bkg = np.sum(y[selected] == 0) n_selected_sig = np.sum(y[selected] == 1) result_row['n'] = n_selected result_row['b'] = n_selected_bkg result_row['s'] = n_selected_sig result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected) result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected) results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def __init__(self, X_test, w_test, i_cv, args, config=None, n_bins=10): self.X_test = X_test self.w_test = w_test self.args = args self.i_cv = i_cv self.config = Config() if config is None else config self.n_bins = n_bins
def main(): logger = set_logger() directory = os.path.join(DIRECTORY, "nll_contour") os.makedirs(directory, exist_ok=True) args = parse_args() train_generator, valid_generator, test_generator = get_generators() config = Config() model = load_some_NN(cuda=args.cuda) compute_nll = get_nll_computer(model, config, valid_generator, test_generator) nll = compute_nll(*config.CALIBRATED) logger.info(f"calib nll = {nll}") nll = compute_nll(*config.TRUE) logger.info(f"calib nll = {nll}") f = lambda xk: compute_nll(*xk) xk = np.array(list(config.TRUE)) print(xk) EPSILON = 1e-8 epsilon = np.array([EPSILON] * 2) grad = approx_fprime(xk, f, epsilon) print(grad, grad.dot(grad.T)) logger.info(f"Running BFGS on the NLL") x_0 = np.array(list(config.CALIBRATED)) print(fmin_bfgs(f, x_0))
def _make_rescale_plot_clf(true_rescale, true_mu): config = Config() test_generator = Generator(seed=SEED) X_test, y_test, w_test = test_generator.generate( true_rescale, true_mu, n_samples=config.N_TESTING_SAMPLES) i_cv = 0 model = load_some_NN(i_cv=i_cv, cuda=False) compute_summaries = model.summary_computer(n_bins=N_BINS) valid_generator = Generator(seed=SEED - 1) compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) rescale_array = np.linspace(0.5, 3, 50) nll_array = [compute_nll(rescale, true_mu) for rescale in rescale_array] param_name = 'rescale' p = plt.plot(rescale_array, nll_array, label=f'NLL {param_name}={true_rescale}') plt.axvline(x=true_rescale, linestyle='--', color=p[0].get_color(), label='true value')
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN eval_table = get_eval_table(args, model.results_directory) # EVALUATION print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.results_directory, 'estimations.csv')) # EVALUATION eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) N_BINS = 10 X_train, y_train, w_train = train_generator.generate( *config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train) result_table = [ run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def main(): logger = set_logger() root_directory = os.path.join(DIRECTORY, "nll_contour") os.makedirs(root_directory, exist_ok=True) args = parse_args() N_CV = 3 # FIXME : remove lili and STEP to use all iteration ! STEP = 1 lili = list(Config().iter_test_config())[::STEP] N_ITER = len(lili) logger.info(f"{N_CV} cv and {N_ITER} iteractions ({N_ITER*N_CV} loops)") data = [] for i_cv in range(N_CV): model = load_some_NN(i_cv=i_cv, cuda=args.cuda) model.to_double() # model = load_some_GB(i_cv=i_cv) for i_iter, config in enumerate(lili): i_iter = i_iter * STEP values = run_cv_iter(args, i_cv, i_iter, config, model, root_directory) data.append(values) data = pd.DataFrame(data) fname = os.path.join(root_directory, "data.csv") data.to_csv(fname)
def main(): # BASIC SETUP logger = set_logger() args = FF_parse_args(main_description="Training launcher for Feature Filter on GG benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv(os.path.join(model.results_directory, 'config_table.csv')) # RUN if not args.conditional_only: eval_table = get_eval_table(args, model.results_directory) if not args.estimate_only: eval_conditional = get_eval_conditional(args, model.results_directory) if not args.estimate_only and not args.conditional_only: eval_table = pd.concat([eval_table, eval_conditional], axis=1) # EVALUATION print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) # test_generator = Generator(seed+2) results = [] for n_train_samples in N_TRAIN_RANGE: result_row['n_train_samples'] = n_train_samples # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING X_train, y_train, w_train = train_generator.generate( *config.CALIBRATED, n_samples=n_train_samples) model.fit(X_train, y_train, w_train) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) some_eval = evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid', suffix=f'-{n_train_samples}') result_row['valid_auc'] = some_eval[f'valid_auc-{n_train_samples}'] result_row['valid_accuracy'] = some_eval[ f'valid_accuracy-{n_train_samples}'] N_BINS = 10 evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix=f'{n_train_samples}') results.append(result_row.copy()) result_table = pd.DataFrame(results) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) train_generator = TrainGenerator(param_generator, train_generator) # SET MODEL logger.info('Set up regressor') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') # MEASUREMENT result_row['nfcn'] = NCALL iter_results = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = [e0 for e0, e1 in iter_results] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) conditional_estimate = pd.concat([e1 for e0, e1 in iter_results]) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return result_table, conditional_estimate
def generate(self, n_samples): if n_samples is not None: params = self.param_generator() X, y, w = self.data_generator.generate(*params, n_samples) return X, params.interest_parameters, w, params.nuisance_parameters else: config = Config() X, y, w = self.data_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) return X, y, w, 1
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 # train_generator = Generator(seed) # valid_generator = Generator(seed+1) test_generator = Generator(seed + 2) # SET MODEL # logger.info('Set up classifier') model = build_model(args, i_cv) # flush(logger) # TRAINING / LOADING # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT N_BINS = 10 # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') result_table = [ run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # Setup model logger.info("Setup model") model = build_model(args, 0) os.makedirs(model.results_directory, exist_ok=True) # Setup data logger.info("Setup data") config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) seed = SEED + 99999 train_generator = TrainGenerator(param_generator, Generator(seed)) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) i_cv = 0 result_row = {'i_cv': i_cv} # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') print_line() result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_directory, 'results.csv')) logger.info('Plot params') param_names = [CALIB_PARAM_NAME] for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_directory) logger.info('DONE')
def explore_distribs(): config = Config() generator = Generator() data, label = generator.sample_event(*config.TRUE, size=config.N_TESTING_SAMPLES) prior_rescale = stats.norm(loc=config.CALIBRATED.rescale, scale=config.CALIBRATED_ERROR.rescale) prior_mu = stats.uniform(loc=0, scale=1) plot_data_distrib(generator, config) plot_prior(prior_rescale, "rescale") plot_prior(prior_mu, "mu")
def main(): # BASIC SETUP logger = set_logger() args = INFERNO_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN if args.load_run: logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[') directory = model.results_directory estimations = load_estimations(directory, start_cv=args.start_cv, end_cv=args.end_cv) conditional_estimations = load_conditional_estimations( directory, start_cv=args.start_cv, end_cv=args.end_cv) else: logger.info(f'Running runs [{args.start_cv},{args.end_cv}[') results = [ run(args, i_cv) for i_cv in range(args.start_cv, args.end_cv) ] estimations = [e0 for e0, e1 in results] estimations = pd.concat(estimations, ignore_index=True) conditional_estimations = [e1 for e0, e1 in results] conditional_estimations = pd.concat(conditional_estimations) estimations.to_csv(os.path.join(model.results_directory, 'estimations.csv')) conditional_estimations.to_csv( os.path.join(model.results_directory, 'conditional_estimations.csv')) # EVALUATION eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, estimations) eval_conditional = evaluate_conditional_estimation( conditional_estimations, interest_param_name=config.INTEREST_PARAM_NAME) eval_table = pd.concat([eval_table, eval_conditional], axis=1) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) some_fisher = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) some_fisher_bis = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) assert some_fisher == some_fisher_bis, f"Fisher info should be deterministic but found : {some_fisher} =/= {some_fisher_bis}" # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples") for n_bins in range(1, 30): result_row = {'i_cv': i_cv} gamma_array, beta_array = compute_bins(model, valid_generator, test_config, n_bins=n_bins) fisher = compute_fisher(gamma_array, beta_array, test_config.TRUE.mu) result_row.update({f'gamma_{i}' : gamma for i, gamma in enumerate(gamma_array, 1)}) result_row.update({f'beta_{i}' : beta for i, beta in enumerate(beta_array, 1)}) result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES result_row['fisher'] = fisher result_row['n_bins'] = n_bins results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch(seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) # MEASUREMENT result_row['nfcn'] = NCALL iter_results = [run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config())] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def main(): logger = set_logger() logger.info("Hello world !") os.makedirs(DIRECTORY, exist_ok=True) set_plot_config() args = None config = Config() results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(DIRECTORY, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv')) gather_images(DIRECTORY)
def explore_links(): config = Config() generator = Generator() rescale_range = np.linspace(min(config.RANGE.rescale), max(config.RANGE.rescale), num=5) mu_range = np.linspace(min(config.RANGE.mu), max(config.RANGE.mu), num=15) for rescale in rescale_range: average_list = [] target_list = [] for mu in mu_range: data, label = generator.sample_event(rescale, mu, size=config.N_TESTING_SAMPLES) average_list.append(np.mean(data, axis=0)) target_list.append(mu) plt.scatter(average_list, target_list, label=f'rescale={rescale}') plt.title('Link between mean(x) and mu') plt.ylabel('mu') plt.xlabel('mean(x)') plt.legend() plt.savefig(os.path.join(DIRECTORY, 'mean_link.png')) plt.clf()
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # Config config = Config() config.TRUE = Parameter(rescale=0.9, mu=0.1) train_generator = Generator(SEED) valid_generator = Generator(SEED + 1) test_generator = Generator(SEED + 2) X_test, y_test, w_test = test_generator.generate( *config.TRUE, n_samples=config.N_TESTING_SAMPLES) # for nuisance in p(nuisance | data) nuisance_param_sample = [ param_generator().nuisance_parameters for _ in range(25) ] average_list = [] variance_list = [] result_table = [] for nuisance_params in nuisance_param_sample: logger.info(f"nuisance_params = {nuisance_params}") estimator_values = [] for i_cv in range(N_ITER): clf = build_model(args, i_cv) parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters) print(parameters) n_samples = config.N_TRAINING_SAMPLES X_train, y_train, w_train = train_generator.generate( *parameters, n_samples=n_samples) logger.info(f"Training {clf.full_name}") clf.fit(X_train, y_train, w_train) compute_summaries = ClassifierSummaryComputer(clf, n_bins=10) nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config) compute_nll = lambda mu: nll_computer(*nuisance_params, mu) minimizer = get_minimizer(compute_nll) results = evaluate_minuit(minimizer, [config.TRUE.interest_parameters]) estimator_values.append(results['mu']) results['i_cv'] = i_cv results.update(params_to_dict(parameters, suffix='true')) result_table.append(results.copy()) average_list.append(np.mean(estimator_values)) variance_list.append(np.var(estimator_values)) model = build_model(args, 0) model.set_info(DATA_NAME, BENCHMARK_NAME, 0) save_directory = model.results_path os.makedirs(save_directory, exist_ok=True) result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(save_directory, 'results.csv')) logger.info(f"average_list {average_list}") logger.info(f"variance_list {variance_list}") v_stat = np.mean(variance_list) v_syst = np.var(average_list) v_total = v_stat + v_syst logger.info(f"V_stat = {v_stat}") logger.info(f"V_syst = {v_syst}") logger.info(f"V_total = {v_total}") eval_dict = {"V_stat": v_stat, "V_syst": v_syst, "V_total": v_total} eval_path = os.path.join(save_directory, 'info.json') with open(eval_path, 'w') as f: json.dump(eval_dict, f)
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up regressor') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_inferno(model, prefix='valid') # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table