Ejemplo n.º 1
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_classifier(model,
                             train_generator,
                             config.CALIBRATED,
                             config.N_TRAINING_SAMPLES,
                             retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_conditional_estimation_iter(model,
                                        result_row,
                                        i,
                                        test_config,
                                        valid_generator,
                                        test_generator,
                                        n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
Ejemplo n.º 2
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_classifier(model,
                             train_generator,
                             config.CALIBRATED,
                             config.N_TRAINING_SAMPLES,
                             retrain=args.retrain)

    # MEASUREMENT
    result_row = {'i_cv': i_cv}
    results = []
    for test_config in config.iter_test_config():
        logger.info(
            f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples"
        )
        for threshold in np.linspace(0, 1, 500):
            result_row = {'i_cv': i_cv}
            result_row['threshold'] = threshold
            result_row.update(test_config.TRUE.to_dict(prefix='true_'))
            result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES

            X, y, w = valid_generator.generate(
                *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            beta = np.sum(y[selected] == 0)
            gamma = np.sum(y[selected] == 1)
            result_row['beta'] = beta
            result_row['gamma'] = gamma

            X, y, w = test_generator.generate(
                *config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            n_selected = np.sum(selected)
            n_selected_bkg = np.sum(y[selected] == 0)
            n_selected_sig = np.sum(y[selected] == 1)
            result_row['n'] = n_selected
            result_row['b'] = n_selected_bkg
            result_row['s'] = n_selected_sig
            result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected)
            result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected)
            results.append(result_row.copy())
    results = pd.DataFrame(results)
    print(results)
    return results
Ejemplo n.º 3
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up inferno model')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_inferno(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_inferno(model, prefix='valid')

    # MEASUREMENT
    calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME)
    calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME)
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_estimation_iter(model,
                            result_row,
                            i,
                            test_config,
                            valid_generator,
                            test_generator,
                            calib_r,
                            calib_lam,
                            n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table
Ejemplo n.º 4
0
def run_iter(model,
             result_row,
             i_iter,
             config,
             valid_generator,
             test_generator,
             calib_rescale,
             n_bins=10):
    logger = logging.getLogger()
    logger.info('-' * 45)
    logger.info(f'iter : {i_iter}')
    flush(logger)

    iter_directory = os.path.join(model.results_path, f'iter_{i_iter}')
    os.makedirs(iter_directory, exist_ok=True)
    result_row['i'] = i_iter
    result_row['n_test_samples'] = config.N_TESTING_SAMPLES
    suffix = f'-mu={config.TRUE.mu:1.2f}_rescale={config.TRUE.rescale}'

    logger.info('Generate testing data')
    test_generator.reset()
    X_test, y_test, w_test = test_generator.generate(
        *config.TRUE, n_samples=config.N_TESTING_SAMPLES)
    # PLOT SUMMARIES
    evaluate_summary_computer(model,
                              X_test,
                              y_test,
                              w_test,
                              n_bins=n_bins,
                              prefix='',
                              suffix=suffix,
                              directory=iter_directory)

    # CALIBRATION
    rescale_mean, rescale_sigma = calib_rescale.predict(X_test, w_test)
    logger.info('rescale  = {} =vs= {} +/- {}'.format(config.TRUE.rescale,
                                                      rescale_mean,
                                                      rescale_sigma))
    config.CALIBRATED = Parameter(rescale_mean,
                                  config.CALIBRATED.interest_parameters)
    config.CALIBRATED_ERROR = Parameter(
        rescale_sigma, config.CALIBRATED_ERROR.interest_parameters)
    for name, value in config.CALIBRATED.items():
        result_row[name + "_calib"] = value
    for name, value in config.CALIBRATED_ERROR.items():
        result_row[name + "_calib_error"] = value

    logger.info('Set up NLL computer')
    compute_summaries = ClassifierSummaryComputer(model, n_bins=n_bins)
    compute_nll = NLLComputer(compute_summaries,
                              valid_generator,
                              X_test,
                              w_test,
                              config=config)
    # NLL PLOTS
    plot_nll_around_min(compute_nll, config.TRUE, iter_directory, suffix)

    # MEASURE STAT/SYST VARIANCE
    logger.info('MEASURE STAT/SYST VARIANCE')
    conditional_results = make_conditional_estimation(compute_nll, config)
    fname = os.path.join(iter_directory, "no_nuisance.csv")
    conditional_estimate = pd.DataFrame(conditional_results)
    conditional_estimate['i'] = i_iter
    conditional_estimate.to_csv(fname)

    # MINIMIZE NLL
    logger.info('Prepare minuit minimizer')
    minimizer = get_minimizer(compute_nll, config.CALIBRATED,
                              config.CALIBRATED_ERROR)
    result_row.update(evaluate_minuit(minimizer, config.TRUE))
    return result_row.copy(), conditional_estimate
Ejemplo n.º 5
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_classifier(model,
                             train_generator,
                             config.CALIBRATED,
                             config.N_TRAINING_SAMPLES,
                             retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    calib_rescale = load_calib_rescale()
    N_BINS = 10
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_iter(model,
                 result_row,
                 i,
                 test_config,
                 valid_generator,
                 test_generator,
                 calib_rescale,
                 n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = [e0 for e0, e1 in iter_results]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    conditional_estimate = pd.concat([e1 for e0, e1 in iter_results])
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return result_table, conditional_estimate
Ejemplo n.º 6
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = Config()
    seed = config.SEED + i_cv * 5
    train_generator = Synthetic3DGeneratorTorch(seed)
    valid_generator = S3D2(seed + 1)
    test_generator = S3D2(seed + 2)

    # SET MODEL
    logger.info('Set up inferno')
    model = build_model(args, i_cv)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_inferno(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model))

    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        pb_config.CALIBRATED_R,
        pb_config.CALIBRATED_LAMBDA,
        pb_config.CALIBRATED_MU,
        n_samples=pb_config.N_VALIDATION_SAMPLES)

    # MEASUREMENT
    N_BINS = args.n_bins
    compute_summaries = model.compute_summaries
    for mu in pb_config.TRUE_MU_RANGE:
        true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu)
        suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}'
        logger.info('Generate testing data')
        X_test, y_test, w_test = test_generator.generate(
            *true_params, n_samples=pb_config.N_TESTING_SAMPLES)
        # PLOT SUMMARIES
        evaluate_summary_computer(model,
                                  X_valid,
                                  y_valid,
                                  w_valid,
                                  X_test,
                                  w_test,
                                  n_bins=N_BINS,
                                  prefix='',
                                  suffix=suffix)

        logger.info('Set up NLL computer')
        compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test,
                              w_test)
        # NLL PLOTS
        plot_nll_around_min(compute_nll, true_params, model.path, suffix)

        # MINIMIZE NLL
        logger.info('Prepare minuit minimizer')
        minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED,
                                  pb_config.CALIBRATED_ERROR)
        fmin, params = estimate(minimizer)
        result_row.update(evaluate_minuit(minimizer, fmin, params,
                                          true_params))

        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    param_names = pb_config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.path)

    logger.info('DONE')
    return result_table
Ejemplo n.º 7
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator, valid_generator, test_generator = get_generators_torch(
        seed, cuda=args.cuda, GeneratorClass=GeneratorClass)
    train_generator = GeneratorCPU(train_generator)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED,
        n_samples=config.N_VALIDATION_SAMPLES,
        no_grad=True)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')

    # MEASUREMENT
    calibs = {}
    calibs['tes'] = load_calib_tes(DATA_NAME, BENCHMARK_NAME)
    calibs['jes'] = load_calib_jes(DATA_NAME, BENCHMARK_NAME)
    calibs['les'] = load_calib_les(DATA_NAME, BENCHMARK_NAME)
    result_row['nfcn'] = NCALL
    iter_results = [
        run_estimation_iter(model, result_row, i, test_config, valid_generator,
                            test_generator, calibs)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table
def main():
    # BASIC SETUP
    logger = set_logger()
    args = parse_args()
    logger.info(args)
    flush(logger)

    # SET MODEL
    model = get_model(args)

    # LOAD/GENERATE DATA
    logger.info('Generating data ...')
    pb_config = Config()
    generator = Synthetic3D(seed=config.SEED, n_expected_events=1050)
    generator.N_SIG = pb_config.N_SIG
    generator.N_BKG = pb_config.N_BKG
    D_train = generator.train_sample(pb_config.CALIBRATED_R,
                                     pb_config.CALIBRATED_LAMBDA,
                                     pb_config.CALIBRATED_MU,
                                     n_samples=pb_config.N_TRAINING_SAMPLES)
    D_test = generator.test_sample(pb_config.CALIBRATED_R,
                                   pb_config.CALIBRATED_LAMBDA,
                                   pb_config.CALIBRATED_MU)
    X_train, y_train, w_train = split_data_label_weights(D_train)
    X_test, y_test, w_test = split_data_label_weights(D_test)

    # TRAINING
    model.fit(X_train, y_train, w_train)
    # SAVE MODEL
    i = 99
    model_name = '{}-{}'.format(model.get_name(), i)
    model_path = os.path.join(config.SAVING_DIR, model_name)
    logger.info("Saving in {}".format(model_path))
    os.makedirs(model_path, exist_ok=True)
    model.save(model_path)

    # CHECK TRAINING
    plot_test_distrib(model, model_name, model_path, X_test, y_test)
    plot_summaries(model, model_name, model_path, X_test, y_test, w_test)

    # NLL
    summary_computer = lambda X, w: compute_summaries(model, X, w, n_bins=10)
    D_final = generator.final_sample(pb_config.TRUE_R, pb_config.TRUE_LAMBDA,
                                     pb_config.TRUE_MU)
    X_final, y_final, w_final = split_data_label_weights(D_final)
    compute_nll = Synthetic3DNLL(summary_computer, generator, X_final, w_final)

    # NLL PLOTS
    plot_R_around_min(compute_nll, model_path)
    plot_LAMBDA_around_min(compute_nll, model_path)
    plot_MU_around_min(compute_nll, model_path)

    # MINIMIZE NLL
    minimizer = iminuit.Minuit(
        compute_nll,
        errordef=ERRORDEF_NLL,
        r=pb_config.CALIBRATED_R,
        error_r=pb_config.CALIBRATED_R_ERROR,
        #limit_r=(0, None),
        lam=pb_config.CALIBRATED_LAMBDA,
        error_lam=pb_config.CALIBRATED_LAMBDA_ERROR,
        limit_lam=(0, None),
        mu=pb_config.CALIBRATED_MU,
        error_mu=pb_config.CALIBRATED_MU_ERROR,
        limit_mu=(0, 1),
    )
    minimizer.print_param()
    fmin, param = minimizer.migrad()
    param = minimizer.hesse()
    for name, (value,
               err) in {p['name']: (p['value'], p['error'])
                        for p in param}.items():
        print('{name:3} = {value} ({err})'.format(**locals()))

    print('true_r', pb_config.TRUE_R)
    print('true_lam', pb_config.TRUE_LAMBDA)
    print('true_mu', pb_config.TRUE_MU)

    print(param[2]['value'] * 1050, 'signal events estimated')
    print(param[2]['error'] * 1050, 'error on # estimated sig event')
    print('Done.')