def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, train_generator.n_samples * N_AUGMENT, retrain=args.retrain) # MEASUREMENT results = measurement(model, i_cv, config, valid_generator, test_generator) print(results) return results
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT N_BINS = 10 evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] result_table = [e0 for e0, e1 in iter_results] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) conditional_estimate = pd.concat([e1 for e0, e1 in iter_results]) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return result_table, conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) some_fisher = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) some_fisher_bis = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) assert some_fisher == some_fisher_bis, f"Fisher info should be deterministic but found : {some_fisher} =/= {some_fisher_bis}" # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples") for n_bins in range(1, 30): result_row = {'i_cv': i_cv} gamma_array, beta_array = compute_bins(model, valid_generator, test_config, n_bins=n_bins) fisher = compute_fisher(gamma_array, beta_array, test_config.TRUE.mu) result_row.update({f'gamma_{i}' : gamma for i, gamma in enumerate(gamma_array, 1)}) result_row.update({f'beta_{i}' : beta for i, beta in enumerate(beta_array, 1)}) result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES result_row['fisher'] = fisher result_row['n_bins'] = n_bins results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch(seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING config.N_TRAINING_SAMPLES = train_generator.n_samples train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config())] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES * N_AUGMENT, retrain=args.retrain) # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info( f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples" ) for threshold in np.linspace(0, 1, 500): result_row = {'i_cv': i_cv} result_row['threshold'] = threshold result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES X, y, w = valid_generator.generate( *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold beta = np.sum(y[selected] == 0) gamma = np.sum(y[selected] == 1) result_row['beta'] = beta result_row['gamma'] = gamma X, y, w = test_generator.generate( *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold n_selected = np.sum(selected) n_selected_bkg = np.sum(y[selected] == 0) n_selected_sig = np.sum(y[selected] == 1) result_row['n'] = n_selected result_row['b'] = n_selected_bkg result_row['s'] = n_selected_sig result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected) result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected) results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING config.N_TRAINING_SAMPLES = train_generator.n_samples train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES * N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT calibs = {} calibs['tes'] = load_calib_tes(DATA_NAME, BENCHMARK_NAME) calibs['jes'] = load_calib_jes(DATA_NAME, BENCHMARK_NAME) calibs['les'] = load_calib_les(DATA_NAME, BENCHMARK_NAME) evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calibs, n_bins=N_BINS, tolerance=args.tolerance) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table