def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) train_generator = TrainGenerator(train_generator, cuda=args.cuda) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda) train_generator = GeneratorCPU(train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # MEASUREMENT results = measurement(model, i_cv, config, valid_generator, test_generator) print(results) return results
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) N_BINS = 10 X_train, y_train, w_train = train_generator.generate( *config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES) compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train) result_table = [ run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() directory = os.path.join(DIRECTORY, f'cv_{i_cv}') os.makedirs(directory, exist_ok=True) config = S3D2Config() seed = SEED + i_cv * 5 test_seed = seed + 2 result_table = [ run_iter(i_cv, i, test_config, test_seed, directory) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(directory, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title='Likelihood fit', directory=directory) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = GeneratorTorch(seed, cuda=args.cuda) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples") for threshold in np.linspace(0, 1, 500): result_row = {'i_cv': i_cv} result_row['threshold'] = threshold result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES X, y, w = valid_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold beta = np.sum(y[selected] == 0) gamma = np.sum(y[selected] == 1) result_row['beta'] = beta result_row['gamma'] = gamma X, y, w = test_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES) proba = model.predict_proba(X) decision = proba[:, 1] selected = decision > threshold n_selected = np.sum(selected) n_selected_bkg = np.sum(y[selected] == 0) n_selected_sig = np.sum(y[selected] == 1) result_row['n'] = n_selected result_row['b'] = n_selected_bkg result_row['s'] = n_selected_sig result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected) result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected) results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) # test_generator = Generator(seed+2) results = [] for n_train_samples in N_TRAIN_RANGE: result_row['n_train_samples'] = n_train_samples # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING X_train, y_train, w_train = train_generator.generate( *config.CALIBRATED, n_samples=n_train_samples) model.fit(X_train, y_train, w_train) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) some_eval = evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid', suffix=f'-{n_train_samples}') result_row['valid_auc'] = some_eval[f'valid_auc-{n_train_samples}'] result_row['valid_accuracy'] = some_eval[ f'valid_accuracy-{n_train_samples}'] N_BINS = 10 evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix=f'{n_train_samples}') results.append(result_row.copy()) result_table = pd.DataFrame(results) return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) train_generator = TrainGenerator(param_generator, train_generator) # SET MODEL logger.info('Set up regressor') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') # MEASUREMENT result_row['nfcn'] = NCALL iter_results = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = [e0 for e0, e1 in iter_results] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) conditional_estimate = pd.concat([e1 for e0, e1 in iter_results]) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return result_table, conditional_estimate
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 # train_generator = Generator(seed) # valid_generator = Generator(seed+1) test_generator = Generator(seed + 2) # SET MODEL # logger.info('Set up classifier') model = build_model(args, i_cv) # flush(logger) # TRAINING / LOADING # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT N_BINS = 10 # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') result_table = [ run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_path, 'results.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # Setup model logger.info("Setup model") model = build_model(args, 0) os.makedirs(model.results_directory, exist_ok=True) # Setup data logger.info("Setup data") config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) seed = SEED + 99999 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) i_cv = 0 result_row = {'i_cv': i_cv} # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model, prefix='valid')) evaluate_regressor(model, prefix='valid') print_line() result_table = [ run_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(result_table) result_table.to_csv(os.path.join(model.results_directory, 'results.csv')) logger.info('Plot params') param_names = [CALIB_PARAM_NAME] for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_directory) logger.info('DONE')
def main(): # BASIC SETUP logger = set_logger() args = NET_parse_args( main_description= "Training launcher for Neural net classifier on HIGGS benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN if not args.conditional_only: eval_table = get_eval_table(args, model.results_directory) if not args.estimate_only: eval_conditional = get_eval_conditional(args, model.results_directory) if not args.estimate_only and not args.conditional_only: eval_table = pd.concat([eval_table, eval_conditional], axis=1) # EVALUATION print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv( os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def get_eval_conditional(args, results_directory): logger = logging.getLogger() if args.load_run: logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[') conditional_estimations = load_conditional_estimations( results_directory, start_cv=args.start_cv, end_cv=args.end_cv) else: logger.info(f'Running runs [{args.start_cv},{args.end_cv}[') conditional_estimations = [ run_conditional_estimation(args, i_cv) for i_cv in range(args.start_cv, args.end_cv) ] conditional_estimations = pd.concat(conditional_estimations, ignore_index=True) conditional_estimations.to_csv( os.path.join(results_directory, 'conditional_estimations.csv')) # EVALUATION eval_conditional = evaluate_conditional_estimation( conditional_estimations, interest_param_name=Config.INTEREST_PARAM_NAME) print_line() print_line() print(eval_conditional) print_line() print_line() eval_conditional.to_csv( os.path.join(results_directory, 'conditional_evaluation.csv')) return eval_conditional
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.results_directory, 'estimations.csv')) # EVALUATION eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # INFO args.net = AR5R5E(n_in=3, n_out=2, n_extra=2) args.optimizer = get_optimizer(args) model = get_model(args, Regressor) model.set_info(DATA_NAME, BENCHMARK_NAME, -1) pb_config = S3D2Config() # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.results_directory, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch(seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = TrainGenerator(train_generator, cuda=args.cuda) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT calibs = {} calibs['tes'] = load_calib_tes(DATA_NAME, BENCHMARK_NAME) calibs['jes'] = load_calib_jes(DATA_NAME, BENCHMARK_NAME) calibs['les'] = load_calib_les(DATA_NAME, BENCHMARK_NAME) evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calibs, n_bins=N_BINS, tolerance=args.tolerance) for i, test_config in enumerate(config.iter_test_config())] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def get_eval_table(args, results_directory): logger = logging.getLogger() if args.load_run: logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[') estimations = load_estimations(results_directory, start_cv=args.start_cv, end_cv=args.end_cv) else: logger.info(f'Running runs [{args.start_cv},{args.end_cv}[') estimations = [ run_estimation(args, i_cv) for i_cv in range(args.start_cv, args.end_cv) ] estimations = pd.concat(estimations, ignore_index=True) estimations.to_csv(os.path.join(results_directory, 'estimations.csv')) # EVALUATION eval_table = evaluate_estimator(Config.INTEREST_PARAM_NAME, estimations) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv( os.path.join(results_directory, 'estimation_evaluation.csv')) return eval_table
def main(): # BASIC SETUP logger = set_logger() args = REG_parse_args( main_description= "Training launcher for Marginal Regressor on HIGGS benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN eval_table = get_eval_table(args, model.results_directory) # EVALUATION print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed+1) test_generator = Generator(seed+2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain) some_fisher = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) some_fisher_bis = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu) assert some_fisher == some_fisher_bis, f"Fisher info should be deterministic but found : {some_fisher} =/= {some_fisher_bis}" # MEASUREMENT result_row = {'i_cv': i_cv} results = [] for test_config in config.iter_test_config(): logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples") for n_bins in range(1, 30): result_row = {'i_cv': i_cv} gamma_array, beta_array = compute_bins(model, valid_generator, test_config, n_bins=n_bins) fisher = compute_fisher(gamma_array, beta_array, test_config.TRUE.mu) result_row.update({f'gamma_{i}' : gamma for i, gamma in enumerate(gamma_array, 1)}) result_row.update({f'beta_{i}' : beta for i, beta in enumerate(beta_array, 1)}) result_row.update(test_config.TRUE.to_dict(prefix='true_')) result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES result_row['fisher'] = fisher result_row['n_bins'] = n_bins results.append(result_row.copy()) results = pd.DataFrame(results) print(results) return results
def run_conditional_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator, valid_generator, test_generator = get_generators_torch( seed, cuda=args.cuda, GeneratorClass=GeneratorClass) train_generator = GeneratorCPU(train_generator) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = GeneratorCPU(valid_generator) test_generator = GeneratorCPU(test_generator) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_neural_net(model, train_generator, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True) # MEASUREMENT result_row['nfcn'] = NCALL iter_results = [ run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator) for i, test_config in enumerate(config.iter_test_config()) ] conditional_estimate = pd.concat(iter_results) conditional_estimate['i_cv'] = i_cv fname = os.path.join(model.results_path, "conditional_estimations.csv") conditional_estimate.to_csv(fname) logger.info('DONE') return conditional_estimate
def main(): # BASIC SETUP logger = set_logger() args = INFERNO_parse_args( main_description= "Training launcher for Gradient boosting on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) os.makedirs(model.results_directory, exist_ok=True) config = Config() config_table = evaluate_config(config) config_table.to_csv( os.path.join(model.results_directory, 'config_table.csv')) # RUN if args.load_run: logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[') directory = model.results_directory estimations = load_estimations(directory, start_cv=args.start_cv, end_cv=args.end_cv) conditional_estimations = load_conditional_estimations( directory, start_cv=args.start_cv, end_cv=args.end_cv) else: logger.info(f'Running runs [{args.start_cv},{args.end_cv}[') results = [ run(args, i_cv) for i_cv in range(args.start_cv, args.end_cv) ] estimations = [e0 for e0, e1 in results] estimations = pd.concat(estimations, ignore_index=True) conditional_estimations = [e1 for e0, e1 in results] conditional_estimations = pd.concat(conditional_estimations) estimations.to_csv(os.path.join(model.results_directory, 'estimations.csv')) conditional_estimations.to_csv( os.path.join(model.results_directory, 'conditional_estimations.csv')) # EVALUATION eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, estimations) eval_conditional = evaluate_conditional_estimation( conditional_estimations, interest_param_name=config.INTEREST_PARAM_NAME) eval_table = pd.concat([eval_table, eval_conditional], axis=1) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv')) gather_images(model.results_directory)
def main(): logger = set_logger() logger.info("Hello world !") os.makedirs(DIRECTORY, exist_ok=True) set_plot_config() args = None config = Config() results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(DIRECTORY, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv')) gather_images(DIRECTORY)
def main(): # BASIC SETUP logger = set_logger() args = INFERNO_parse_args( main_description="Training launcher for Regressor on S3D2 benchmark") logger.info(args) flush(logger) # INFO model = build_model(args, -1) pb_config = Config() # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.directory, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv')) gather_images(model.directory)
def main(): # BASIC SETUP logger = set_logger() args = GB_parse_args(main_description="Training launcher for Gradient boosting on AP1 benchmark") logger.info(args) flush(logger) # INFO model = get_model(args, GradientBoostingModel) model.set_info(BENCHMARK_NAME, -1) pb_config = AP1Config() # RUN results = [run(args, i_cv) for i_cv in range(N_ITER)] results = pd.concat(results, ignore_index=True) results.to_csv(os.path.join(model.directory, 'results.csv')) # EVALUATION eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results) print_line() print_line() print(eval_table) print_line() print_line() eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv')) gather_images(model.directory)
def run_iter(i_cv, i_iter, config, seed, directory): # Init logger = logging.getLogger() print_line() logger.info('running iter n°{}'.format(i_iter)) directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(directory, exist_ok=True) results = dict(i_cv=i_cv, i=i_iter) # Config # DATA_N_SAMPLES = config.N_TESTING_SAMPLES DATA_N_SAMPLES = 9000 R_MIN = config.TRUE.r - 0.3 R_MAX = config.TRUE.r + 0.3 LAM_MIN = config.TRUE.lam - 1 LAM_MAX = config.TRUE.lam + 1 MU_MIN = max(0, config.TRUE.mu - 0.1) MU_MAX = min(1.0, config.TRUE.mu + 0.1) R_N_SAMPLES = 21 LAM_N_SAMPLES = 22 MU_N_SAMPLES = 23 # Prior prior_r = stats.uniform(loc=R_MIN, scale=R_MAX - R_MIN) prior_lam = stats.uniform(loc=LAM_MIN, scale=LAM_MAX - LAM_MIN) prior_mu = stats.uniform(loc=MU_MIN, scale=MU_MAX - MU_MIN) # Param grid r_grid = np.linspace(R_MIN, R_MAX, R_N_SAMPLES) lam_grid = np.linspace(LAM_MIN, LAM_MAX, LAM_N_SAMPLES) mu_grid = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES) # Data Generator generator = Generator(seed) data, label = generator.sample_event(*config.TRUE, size=DATA_N_SAMPLES) debug_label(label) # Compute likelihood shape = (R_N_SAMPLES, LAM_N_SAMPLES, MU_N_SAMPLES) n_elements = np.prod(shape) logger.info(f"3D grid has {n_elements} elements") log_likelihood = np.zeros(shape) log_prior_proba = np.zeros(shape) for i, j, k in get_iter_prod(R_N_SAMPLES, LAM_N_SAMPLES, MU_N_SAMPLES, progress_bar=True): log_likelihood[i, j, k] = generator.log_proba_density( data, r_grid[i], lam_grid[j], mu_grid[k]).sum() log_prior_proba[i, j, k] = prior_r.logpdf(r_grid[i]) \ + prior_lam.logpdf(lam_grid[j]) \ + prior_mu.logpdf(mu_grid[k]) debug_log_proba(log_likelihood, log_prior_proba) # Normalization posterior_r_lam_mu = softmax(log_likelihood + log_prior_proba) debug_posterior(posterior_r_lam_mu) # Marginal posterior param proba marginal_r = posterior_r_lam_mu.sum(axis=2).sum(axis=1) marginal_lam = posterior_r_lam_mu.sum(axis=2).sum(axis=0) marginal_mu = posterior_r_lam_mu.sum(axis=1).sum(axis=0) marginal_r_lam = posterior_r_lam_mu.sum(axis=2) assert marginal_r.shape == r_grid.shape, "sum along the wrong axis for marginal r" assert marginal_lam.shape == lam_grid.shape, "sum along the wrong axis for marginal lam" assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu" assert marginal_r_lam.shape == ( R_N_SAMPLES, LAM_N_SAMPLES), "sum along the wrong axis for marginal (r, lam)" debug_marginal(marginal_r, "r") debug_marginal(marginal_lam, "lam") debug_marginal(marginal_mu, "mu") debug_marginal(marginal_r_lam, "r_lam") # Conditional posterior posterior_mu = np.divide(posterior_r_lam_mu, marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES, 1), out=np.zeros_like(posterior_r_lam_mu), where=(posterior_r_lam_mu != 0)) # Minor check logger.debug("probability densities should sum to one") debug_proba_sum_one(posterior_mu * marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES, 1)) debug_proba_sum_one(posterior_r_lam_mu) debug_proba_sum_one(marginal_r) debug_proba_sum_one(marginal_mu) # Compute estimator values sig_ratio = np.sum(label == 1) / DATA_N_SAMPLES expect_mu = expectancy(mu_grid, marginal_mu) var_mu = variance(mu_grid, marginal_mu) std_mu = np.sqrt(var_mu) expect_r = expectancy(r_grid, marginal_r) var_r = variance(r_grid, marginal_r) std_r = np.sqrt(var_r) expect_lam = expectancy(lam_grid, marginal_lam) var_lam = variance(lam_grid, marginal_lam) std_lam = np.sqrt(var_lam) stat_err = stat_uncertainty(mu_grid, posterior_mu, marginal_r_lam, reshape=(1, 1, -1)) syst_err = syst_uncertainty(mu_grid, posterior_mu, marginal_r_lam, reshape=(1, 1, -1)) i_max, j_max, k_max = np.unravel_index(np.argmax(log_likelihood), log_likelihood.shape) assert np.max(log_likelihood) == log_likelihood[ i_max, j_max, k_max], "max and argmax should point to the same value" # Save estimator values results['mu'] = expect_mu results['mu' + _TRUTH] = config.TRUE.mu results['mu_std'] = std_mu results['mu' + _ERROR] = var_mu results['mu_stat'] = stat_err results['mu_syst'] = syst_err results['r'] = expect_r results['r' + _TRUTH] = config.TRUE.r results['r_std'] = std_r results['r' + _ERROR] = var_r results['lam'] = expect_lam results['lam' + _TRUTH] = config.TRUE.lam results['lam_std'] = std_lam results['lam' + _ERROR] = var_lam # Log estimator values logger.info(f"True mu value = {config.TRUE.mu}") logger.info(f"Sig ratio = {sig_ratio}") logger.info(f"E[mu|x] = {expect_mu}") logger.info(f"Var[mu|x] = {var_mu}") logger.info(f"sqrt(Var[mu|x]) = {std_mu}") logger.info(f"stat_uncertainty = {stat_err}") logger.info(f"syst_uncertainty = {syst_err}") logger.info(f"Var - stat = {var_mu - stat_err}") logger.info(f"argmax_mu p(mu|x) = {mu_grid[np.argmax(marginal_mu)]}") logger.info( f"argmax_r_mu logp(x|r, mu) = {r_grid[i_max]} {mu_grid[j_max]}") # Minor checks debug_min_max(marginal_mu, 'p(mu | x)') debug_min_max(marginal_lam, 'p(lam | x)') debug_min_max(marginal_r, 'p(r | x)') debug_min_max(posterior_mu, 'p(mu | x, r)') debug_min_max(posterior_r_lam_mu, 'p(mu, r | x)') # Plots plot_infer(mu_grid, marginal_mu, expected_value=expect_mu, true_value=config.TRUE.mu, std=std_mu, name='mu', directory=directory, fname='marginal_mu.png') plot_infer(r_grid, marginal_r, expected_value=expect_r, true_value=config.TRUE.r, std=std_r, name='r', directory=directory, fname='marginal_r.png') plot_infer(lam_grid, marginal_lam, expected_value=expect_lam, true_value=config.TRUE.lam, std=std_lam, name='lam', directory=directory, fname='marginal_lam.png') # plot_distrib(data, generator, config.TRUE, expect_r, expect_mu, # title="data distribution", directory=directory, fname='data_distrib.png') return results
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = Config() seed = config.SEED + i_cv * 5 train_generator = Synthetic3DGeneratorTorch(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up inferno') model = build_model(args, i_cv) flush(logger) # TRAINING / LOADING train_or_load_inferno(model, train_generator, retrain=args.retrain) # CHECK TRAINING result_row.update(evaluate_neural_net(model)) logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_MU, n_samples=pb_config.N_VALIDATION_SAMPLES) # MEASUREMENT N_BINS = args.n_bins compute_summaries = model.compute_summaries for mu in pb_config.TRUE_MU_RANGE: true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu) suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}' logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( *true_params, n_samples=pb_config.N_TESTING_SAMPLES) # PLOT SUMMARIES evaluate_summary_computer(model, X_valid, y_valid, w_valid, X_test, w_test, n_bins=N_BINS, prefix='', suffix=suffix) logger.info('Set up NLL computer') compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test, w_test) # NLL PLOTS plot_nll_around_min(compute_nll, true_params, model.path, suffix) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED, pb_config.CALIBRATED_ERROR) fmin, params = estimate(minimizer) result_row.update(evaluate_minuit(minimizer, fmin, params, true_params)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def run_estimation(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} # LOAD/GENERATE DATA logger.info('Set up data generator') config = Config() seed = SEED + i_cv * 5 train_generator = Generator(seed) train_generator = TrainGenerator(param_generator, train_generator) valid_generator = Generator(seed + 1) test_generator = Generator(seed + 2) # SET MODEL logger.info('Set up classifier') model = build_model(args, i_cv) os.makedirs(model.results_path, exist_ok=True) flush(logger) # TRAINING / LOADING train_or_load_data_augmentation(model, train_generator, config.N_TRAINING_SAMPLES * N_AUGMENT, retrain=args.retrain) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES) result_row.update(evaluate_neural_net(model, prefix='valid')) result_row.update( evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid')) # MEASUREMENT evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='') iter_results = [ run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS) for i, test_config in enumerate(config.iter_test_config()) ] result_table = pd.DataFrame(iter_results) result_table.to_csv(os.path.join(model.results_path, 'estimations.csv')) logger.info('Plot params') param_names = config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = S3D2Config() seed = config.SEED + i_cv * 5 train_generator = S3D2(seed) valid_generator = S3D2(seed + 1) test_generator = S3D2(seed + 2) # SET MODEL logger.info('Set up rergessor') args.net = AR5R5E(n_in=3, n_out=2, n_extra=2) args.optimizer = get_optimizer(args) model = get_model(args, Regressor) model.set_info(BENCHMARK_NAME, i_cv) model.param_generator = param_generator flush(logger) # TRAINING / LOADING if not args.retrain: try: logger.info('loading from {}'.format(model.model_path)) model.load(model.model_path) except Exception as e: logger.warning(e) args.retrain = True if args.retrain: logger.info('Training {}'.format(model.get_name())) model.fit(train_generator) logger.info('Training DONE') # SAVE MODEL save_model(model) # CHECK TRAINING logger.info('Plot losses') plot_REG_losses(model) plot_REG_log_mse(model) result_row['loss'] = model.losses[-1] result_row['mse_loss'] = model.mse_losses[-1] # MEASUREMENT for mu in pb_config.TRUE_MU_RANGE: pb_config.TRUE_MU = mu logger.info('Generate testing data') test_generator.reset() X_test, y_test, w_test = test_generator.generate( # pb_config.TRUE_R, # pb_config.TRUE_LAMBDA, pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA, pb_config.TRUE_MU, n_samples=pb_config.N_TESTING_SAMPLES) p_test = np.array( (pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA)) pred, sigma = model.predict(X_test, w_test, p_test) name = pb_config.INTEREST_PARAM_NAME result_row[name] = pred result_row[name + _ERROR] = sigma result_row[name + _TRUTH] = pb_config.TRUE_MU logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_MU, pred, sigma)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') name = pb_config.INTEREST_PARAM_NAME plot_params(name, result_table, title=model.full_name, directory=model.results_path) logger.info('DONE') return result_table
def run_iter(i_cv, i_iter, config, seed, directory): # Init logger = logging.getLogger() print_line() logger.info('running iter n°{}'.format(i_iter)) directory = os.path.join(directory, f'iter_{i_iter}') os.makedirs(directory, exist_ok=True) results = dict(i_cv=i_cv, i=i_iter) # Config RESCALE_MIN = config.TRUE.rescale - 0.2 RESCALE_MAX = config.TRUE.rescale + 0.2 MU_MIN = max(0, config.TRUE.mu - 0.1) MU_MAX = min(1.0, config.TRUE.mu + 0.1) MU_N_SAMPLES = 142 RESCALE_N_SAMPLES = 145 DATA_N_SAMPLES = 2000 # Prior prior_rescale = stats.uniform(loc=RESCALE_MIN, scale=RESCALE_MAX - RESCALE_MIN) prior_mu = stats.uniform(loc=MU_MIN, scale=MU_MAX - MU_MIN) # Param grid rescale_grid = np.linspace(RESCALE_MIN, RESCALE_MAX, RESCALE_N_SAMPLES) mu_grid = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES) # Data Generator generator = Generator(seed) data, label = generator.sample_event(*config.TRUE, size=DATA_N_SAMPLES) debug_label(label) # Compute likelihood shape = (RESCALE_N_SAMPLES, MU_N_SAMPLES) n_elements = np.prod(shape) logger.info(f"3D grid has {n_elements} elements") log_likelihood = np.zeros(shape) log_prior_proba = np.zeros(shape) for i, j in get_iter_prod(RESCALE_N_SAMPLES, MU_N_SAMPLES, progress_bar=True): log_likelihood[i, j] = generator.log_proba_density( data, rescale_grid[i], mu_grid[j]).sum() log_prior_proba[i, j] = prior_rescale.logpdf( rescale_grid[i]) + prior_mu.logpdf(mu_grid[j]) debug_log_proba(log_likelihood, log_prior_proba) # Normalization posterior_rescale_mu = softmax(log_likelihood + log_prior_proba) debug_posterior(posterior_rescale_mu) # Marginal posterior param proba marginal_rescale = posterior_rescale_mu.sum(axis=1) marginal_mu = posterior_rescale_mu.sum(axis=0) assert marginal_rescale.shape == rescale_grid.shape, "sum along the wrong axis for marginal rescale" assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu" debug_marginal(marginal_rescale, "rescale") debug_marginal(marginal_mu, "mu") # Conditional posterior posterior_mu = np.divide(posterior_rescale_mu, marginal_rescale.reshape(RESCALE_N_SAMPLES, 1), out=np.zeros_like(posterior_rescale_mu), where=(posterior_rescale_mu != 0)) # Minor check logger.debug("probability densities should sum to one") debug_proba_sum_one(posterior_mu * marginal_rescale.reshape(-1, 1)) debug_proba_sum_one(posterior_rescale_mu) debug_proba_sum_one(marginal_rescale) debug_proba_sum_one(marginal_mu) # Compute estimator values sig_ratio = np.sum(label == 1) / DATA_N_SAMPLES expect_mu = expectancy(mu_grid, marginal_mu) var_mu = variance(mu_grid, marginal_mu) std_mu = np.sqrt(var_mu) expect_rescale = expectancy(rescale_grid, marginal_rescale) var_rescale = variance(rescale_grid, marginal_rescale) std_rescale = np.sqrt(var_rescale) stat_err = stat_uncertainty(mu_grid, posterior_mu, marginal_rescale) syst_err = syst_uncertainty(mu_grid, posterior_mu, marginal_rescale) i_max, j_max = np.unravel_index(np.argmax(log_likelihood), log_likelihood.shape) assert np.max(log_likelihood) == log_likelihood[ i_max, j_max], "max and argmax should point to the same value" # Save estimator values results['mu'] = expect_mu results['mu' + _TRUTH] = config.TRUE.mu results['mu_std'] = std_mu results['mu' + _ERROR] = var_mu results['mu_stat'] = stat_err results['mu_syst'] = syst_err results['rescale'] = expect_rescale results['rescale' + _TRUTH] = config.TRUE.rescale results['rescale_std'] = std_rescale results['rescale' + _ERROR] = var_rescale # Log estimator values logger.info(f"True mu value = {config.TRUE.mu}") logger.info(f"Sig ratio = {sig_ratio}") logger.info(f"E[mu|x] = {expect_mu}") logger.info(f"Var[mu|x] = {var_mu}") logger.info(f"sqrt(Var[mu|x]) = {std_mu}") logger.info(f"stat_uncertainty = {stat_err}") logger.info(f"syst_uncertainty = {syst_err}") logger.info(f"Var - stat = {var_mu - stat_err}") logger.info(f"argmax_mu p(mu|x) = {mu_grid[np.argmax(marginal_mu)]}") logger.info( f"argmax_rescale_mu logp(x|rescale, mu) = {rescale_grid[i_max]} {mu_grid[j_max]}" ) # Minor checks debug_min_max(marginal_mu, 'p(mu | x)') debug_min_max(marginal_rescale, 'p(rescale | x)') debug_min_max(posterior_mu, 'p(mu | x, rescale)') debug_min_max(posterior_rescale_mu, 'p(mu, rescale | x)') # Plots plot_infer(mu_grid, marginal_mu, expected_value=expect_mu, true_value=config.TRUE.mu, std=std_mu, name='mu', directory=directory, fname='marginal_mu.png') plot_infer(rescale_grid, marginal_rescale, expected_value=expect_rescale, true_value=config.TRUE.rescale, std=std_rescale, name='rescale', directory=directory, fname='marginal_rescale.png') plot_distrib(data, generator, config.TRUE, expect_rescale, expect_mu, title="data distribution", directory=directory, fname='data_distrib.png') return results
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = AP1Config() seed = config.SEED + i_cv * 5 train_generator = AP1(seed) valid_generator = AP1(seed+1) test_generator = AP1(seed+2) # SET MODEL logger.info('Set up classifier') model = get_model(args, GradientBoostingModel) model.set_info(BENCHMARK_NAME, i_cv) flush(logger) # TRAINING / LOADING if not args.retrain: try: logger.info('loading from {}'.format(model.path)) model.load(model.path) except Exception as e: logger.warning(e) args.retrain = True if args.retrain: logger.info('Generate training data') X_train, y_train, w_train = train_generator.generate( apple_ratio=pb_config.CALIBRATED_APPLE_RATIO, n_samples=pb_config.N_TRAINING_SAMPLES) logger.info('Training {}'.format(model.get_name())) model.fit(X_train, y_train, w_train) logger.info('Training DONE') # SAVE MODEL save_model(model) # CHECK TRAINING logger.info('Generate validation data') X_valid, y_valid, w_valid = valid_generator.generate( apple_ratio=pb_config.CALIBRATED_APPLE_RATIO, n_samples=pb_config.N_VALIDATION_SAMPLES) logger.info('Plot distribution of the score') plot_valid_distrib(model, X_valid, y_valid, classes=("pears", "apples")) result_row['valid_accuracy'] = model.score(X_valid, y_valid) # MEASUREMENT n_bins = 10 compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins) for mu in pb_config.TRUE_APPLE_RATIO_RANGE: pb_config.TRUE_APPLE_RATIO = mu logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( apple_ratio=pb_config.TRUE_APPLE_RATIO, n_samples=pb_config.N_TESTING_SAMPLES) logger.info('Set up NLL computer') compute_nll = AP1NLL(compute_summaries, valid_generator, X_test, w_test) logger.info('Plot summaries') extension = '-mu={:1.1f}'.format(pb_config.TRUE_APPLE_RATIO) plot_summaries( model, n_bins, extension, X_valid, y_valid, w_valid, X_test, w_test, classes=('pears', 'apples', 'fruits') ) # NLL PLOTS logger.info('Plot NLL around minimum') plot_apple_ratio_around_min(compute_nll, pb_config.TRUE_APPLE_RATIO, model, extension) # MINIMIZE NLL logger.info('Prepare minuit minimizer') minimizer = get_minimizer(compute_nll) fmin, params = estimate(minimizer) params_truth = [pb_config.TRUE_APPLE_RATIO] print_params(params, params_truth) register_params(params, params_truth, result_row) result_row['is_mingrad_valid'] = minimizer.migrad_ok() result_row.update(fmin) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, title=model.full_name, directory=model.path) logger.info('DONE') return result_table
def run(args, i_cv): logger = logging.getLogger() print_line() logger.info('Running iter n°{}'.format(i_cv)) print_line() result_row = {'i_cv': i_cv} result_table = [] # LOAD/GENERATE DATA logger.info('Set up data generator') pb_config = AP1Config() seed = config.SEED + i_cv * 5 train_generator = Generator(param_generator, AP1(seed)) valid_generator = AP1(seed + 1) test_generator = AP1(seed + 2) # SET MODEL logger.info('Set up rergessor') args.net = F3R3(n_in=1, n_out=2) args.optimizer = get_optimizer(args) model = get_model(args, Regressor) model.set_info(BENCHMARK_NAME, i_cv) flush(logger) # TRAINING / LOADING if not args.retrain: try: logger.info('loading from {}'.format(model.path)) model.load(model.path) except Exception as e: logger.warning(e) args.retrain = True if args.retrain: logger.info('Training {}'.format(model.get_name())) model.fit(train_generator) logger.info('Training DONE') # SAVE MODEL save_model(model) # CHECK TRAINING logger.info('Plot losses') plot_REG_losses(model) plot_REG_log_mse(model) result_row['loss'] = model.losses[-1] result_row['mse_loss'] = model.mse_losses[-1] # MEASUREMENT for mu in pb_config.TRUE_APPLE_RATIO_RANGE: pb_config.TRUE_APPLE_RATIO = mu logger.info('Generate testing data') X_test, y_test, w_test = test_generator.generate( apple_ratio=pb_config.TRUE_APPLE_RATIO, n_samples=pb_config.N_TESTING_SAMPLES) pred, sigma = model.predict(X_test, w_test) name = pb_config.INTEREST_PARAM_NAME result_row[name] = pred result_row[name + _ERROR] = sigma result_row[name + _TRUTH] = pb_config.TRUE_APPLE_RATIO logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_APPLE_RATIO, pred, sigma)) result_table.append(result_row.copy()) result_table = pd.DataFrame(result_table) logger.info('Plot params') param_names = pb_config.PARAM_NAMES for name in param_names: plot_params(name, result_table, model) logger.info('DONE') return result_table