Ejemplo n.º 1
0
def run_conditional_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, n_bins=10):
    logger = logging.getLogger()
    logger.info('-'*45)
    logger.info(f'iter : {i_iter}')
    flush(logger)

    iter_directory = os.path.join(model.results_path, f'iter_{i_iter}')
    os.makedirs(iter_directory, exist_ok=True)

    logger.info('Generate testing data')
    test_generator.reset()
    X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES)
    # SUMMARIES
    logger.info('Set up NLL computer')
    compute_summaries = model.summary_computer(n_bins=n_bins)
    compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config)

    # MEASURE STAT/SYST VARIANCE
    logger.info('MEASURE STAT/SYST VARIANCE')
    conditional_results = make_conditional_estimation(compute_nll, config)
    fname = os.path.join(iter_directory, "no_nuisance.csv")
    conditional_estimate = pd.DataFrame(conditional_results)
    conditional_estimate['i'] = i_iter
    conditional_estimate.to_csv(fname)

    return conditional_estimate
Ejemplo n.º 2
0
def run_estimation_iter(model, result_row, i_iter, config, valid_generator, test_generator, n_bins=10):
    logger = logging.getLogger()
    logger.info('-'*45)
    logger.info(f'iter : {i_iter}')
    flush(logger)

    iter_directory = os.path.join(model.results_path, f'iter_{i_iter}')
    os.makedirs(iter_directory, exist_ok=True)
    result_row['i'] = i_iter
    result_row['n_test_samples'] = config.N_TESTING_SAMPLES
    suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}'

    logger.info('Generate testing data')
    test_generator.reset()
    X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES)
    # PLOT SUMMARIES
    evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory)

    logger.info('Set up NLL computer')
    compute_summaries = model.summary_computer(n_bins=n_bins)
    compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config)
    # NLL PLOTS
    plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix)

    # MINIMIZE NLL
    logger.info('Prepare minuit minimizer')
    minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR)
    result_row.update(evaluate_minuit(minimizer, config.TRUE, iter_directory, suffix=suffix))
    return result_row.copy()
def _make_rescale_plot_clf(true_rescale, true_mu):
    config = Config()
    test_generator = Generator(seed=SEED)
    X_test, y_test, w_test = test_generator.generate(
        true_rescale, true_mu, n_samples=config.N_TESTING_SAMPLES)

    i_cv = 0
    model = load_some_NN(i_cv=i_cv, cuda=False)
    compute_summaries = model.summary_computer(n_bins=N_BINS)
    valid_generator = Generator(seed=SEED - 1)
    compute_nll = NLLComputer(compute_summaries,
                              valid_generator,
                              X_test,
                              w_test,
                              config=config)

    rescale_array = np.linspace(0.5, 3, 50)
    nll_array = [compute_nll(rescale, true_mu) for rescale in rescale_array]
    param_name = 'rescale'
    p = plt.plot(rescale_array,
                 nll_array,
                 label=f'NLL {param_name}={true_rescale}')
    plt.axvline(x=true_rescale,
                linestyle='--',
                color=p[0].get_color(),
                label='true value')
Ejemplo n.º 4
0
def run_iter(compute_summaries, i_cv, i_iter, config, valid_generator,
             test_generator, directory):
    logger = logging.getLogger()
    result_row = dict(i_cv=i_cv, i=i_iter)
    iter_directory = os.path.join(directory, f'iter_{i_iter}')
    os.makedirs(iter_directory, exist_ok=True)

    logger.info(f"True Parameters   = {config.TRUE}")
    suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}'
    X_test, y_test, w_test = test_generator.generate(
        *config.TRUE, n_samples=config.N_TESTING_SAMPLES)
    debug_label(y_test)

    compute_nll = NLLComputer(compute_summaries,
                              valid_generator,
                              X_test,
                              w_test,
                              config=config)
    plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix)

    logger.info('Prepare minuit minimizer')
    minimizer = get_minimizer(compute_nll, config.CALIBRATED,
                              config.CALIBRATED_ERROR)
    result_row.update(evaluate_minuit(minimizer, config.TRUE))
    return result_row
Ejemplo n.º 5
0
def get_nll_computer(model, config, valid_generator, test_generator):
    X_test, y_test, w_test = test_generator.generate(
        *config.TRUE, n_samples=config.N_TESTING_SAMPLES)

    compute_summaries = model.summary_computer(n_bins=N_BINS)
    compute_nll = NLLComputer(compute_summaries,
                              valid_generator,
                              X_test,
                              w_test,
                              config=config)
    return compute_nll
Ejemplo n.º 6
0
def run_iter(model, result_row, i_iter, config, valid_generator, test_generator, calib_rescale, n_bins=10):
    logger = logging.getLogger()
    logger.info('-'*45)
    logger.info(f'iter : {i_iter}')
    flush(logger)

    iter_directory = os.path.join(model.results_path, f'iter_{i_iter}')
    os.makedirs(iter_directory, exist_ok=True)
    result_row['i'] = i_iter
    result_row['n_test_samples'] = config.N_TESTING_SAMPLES
    suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}'

    logger.info('Generate testing data')
    test_generator.reset()
    X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES)
    # PLOT SUMMARIES
    evaluate_summary_computer(model, X_test, y_test, w_test, n_bins=n_bins, prefix='', suffix=suffix, directory=iter_directory)

    # CALIBRATION
    rescale_mean, rescale_sigma = calib_rescale.predict(X_test, w_test)
    logger.info('rescale  = {} =vs= {} +/- {}'.format(config.TRUE.rescale, rescale_mean, rescale_sigma) )
    config.CALIBRATED = Parameter(rescale_mean, config.CALIBRATED.interest_parameters)
    config.CALIBRATED_ERROR = Parameter(rescale_sigma, config.CALIBRATED_ERROR.interest_parameters)
    for name, value in config.CALIBRATED.items():
        result_row[name+"_calib"] = value
    for name, value in config.CALIBRATED_ERROR.items():
        result_row[name+"_calib_error"] = value

    logger.info('Set up NLL computer')
    compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins)
    compute_nll = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config)
    # NLL PLOTS
    plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix)

    # MEASURE STAT/SYST VARIANCE
    logger.info('MEASURE STAT/SYST VARIANCE')
    conditional_results = make_conditional_estimation(compute_nll, config)
    fname = os.path.join(iter_directory, "no_nuisance.csv")
    conditional_estimate = pd.DataFrame(conditional_results)
    conditional_estimate['i'] = i_iter
    conditional_estimate.to_csv(fname)

    # MINIMIZE NLL
    logger.info('Prepare minuit minimizer')
    minimizer = get_minimizer(compute_nll, config.CALIBRATED, config.CALIBRATED_ERROR)
    result_row.update(evaluate_minuit(minimizer, config.TRUE))
    return result_row.copy(), conditional_estimate
Ejemplo n.º 7
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = GB_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # Config
    config = Config()
    config.TRUE = Parameter(rescale=0.9, mu=0.1)
    train_generator = Generator(SEED)
    valid_generator = Generator(SEED + 1)
    test_generator = Generator(SEED + 2)
    X_test, y_test, w_test = test_generator.generate(
        *config.TRUE, n_samples=config.N_TESTING_SAMPLES)

    # for nuisance in p(nuisance | data)
    nuisance_param_sample = [
        param_generator().nuisance_parameters for _ in range(25)
    ]
    average_list = []
    variance_list = []
    result_table = []
    for nuisance_params in nuisance_param_sample:
        logger.info(f"nuisance_params = {nuisance_params}")
        estimator_values = []
        for i_cv in range(N_ITER):
            clf = build_model(args, i_cv)
            parameters = Parameter(*nuisance_params,
                                   config.CALIBRATED.interest_parameters)
            print(parameters)
            n_samples = config.N_TRAINING_SAMPLES
            X_train, y_train, w_train = train_generator.generate(
                *parameters, n_samples=n_samples)
            logger.info(f"Training {clf.full_name}")
            clf.fit(X_train, y_train, w_train)
            compute_summaries = ClassifierSummaryComputer(clf, n_bins=10)
            nll_computer = NLLComputer(compute_summaries,
                                       valid_generator,
                                       X_test,
                                       w_test,
                                       config=config)
            compute_nll = lambda mu: nll_computer(*nuisance_params, mu)
            minimizer = get_minimizer(compute_nll)
            results = evaluate_minuit(minimizer,
                                      [config.TRUE.interest_parameters])
            estimator_values.append(results['mu'])
            results['i_cv'] = i_cv
            results.update(params_to_dict(parameters, suffix='true'))
            result_table.append(results.copy())
        average_list.append(np.mean(estimator_values))
        variance_list.append(np.var(estimator_values))

    model = build_model(args, 0)
    model.set_info(DATA_NAME, BENCHMARK_NAME, 0)
    save_directory = model.results_path
    os.makedirs(save_directory, exist_ok=True)
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(save_directory, 'results.csv'))
    logger.info(f"average_list {average_list}")
    logger.info(f"variance_list {variance_list}")
    v_stat = np.mean(variance_list)
    v_syst = np.var(average_list)
    v_total = v_stat + v_syst
    logger.info(f"V_stat = {v_stat}")
    logger.info(f"V_syst = {v_syst}")
    logger.info(f"V_total = {v_total}")
    eval_dict = {"V_stat": v_stat, "V_syst": v_syst, "V_total": v_total}
    eval_path = os.path.join(save_directory, 'info.json')
    with open(eval_path, 'w') as f:
        json.dump(eval_dict, f)