Ejemplo n.º 1
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.results_directory, 'estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
Ejemplo n.º 2
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    train_generator = TrainGenerator(train_generator, cuda=args.cuda)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_conditional_estimation_iter(model,
                                        result_row,
                                        i,
                                        test_config,
                                        valid_generator,
                                        test_generator,
                                        n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
Ejemplo n.º 3
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    N_BINS = 10
    X_train, y_train, w_train = train_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
    compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train)

    result_table = [run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title='Likelihood fit', directory=directory)

    return result_table
Ejemplo n.º 4
0
 def __init__(self, X_test, w_test, i_cv, args, config=None, n_bins=10):
     self.X_test = X_test
     self.w_test = w_test
     self.args = args
     self.i_cv = i_cv
     self.config = Config() if config is None else config
     self.n_bins = n_bins
Ejemplo n.º 5
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = NET_parse_args(
        main_description=
        "Training launcher for Neural net classifier on HIGGS benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    if not args.conditional_only:
        eval_table = get_eval_table(args, model.results_directory)
    if not args.estimate_only:
        eval_conditional = get_eval_conditional(args, model.results_directory)
    if not args.estimate_only and not args.conditional_only:
        eval_table = pd.concat([eval_table, eval_conditional], axis=1)
        # EVALUATION
        print_line()
        print_line()
        print(eval_table)
        print_line()
        print_line()
        eval_table.to_csv(
            os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
Ejemplo n.º 6
0
 def generate(self, n_samples):
     if n_samples is not None:
         params = self.param_generator()
         X, y, w = self.data_generator.generate(*params, n_samples)
         return X, params.interest_parameters, w, params.nuisance_parameters
     else:
         config = Config()
         X, y, w = self.data_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
         return X, y, w, 1
Ejemplo n.º 7
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)
    train_generator = TrainGenerator(param_generator, train_generator)

    # SET MODEL
    logger.info('Set up regressor')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table
Ejemplo n.º 8
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = GB_parse_args(main_description="Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # Config
    config = Config()
    config.TRUE = Parameter(r=0.1, lam=2.7, mu=0.1)

    train_generator = Generator(SEED)
    valid_generator = Generator(SEED+1)
    test_generator  = Generator(SEED+2)
    X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES)

    # for nuisance in p(nuisance | data)
    nuisance_param_sample = [param_generator().nuisance_parameters for _ in range(25)]
    average_list = []
    variance_list = []
    all_results = []
    for nuisance_params in nuisance_param_sample:
        logger.info(f"nuisance_params = {nuisance_params}")
        estimator_values = []
        results = {name : value for name, value in zip(config.TRUE.nuisance_parameters_names, nuisance_params)}
        for i_cv in range(N_ITER):
            clf = build_model(args, i_cv)
            parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters)
            print(parameters)
            n_samples = config.N_TRAINING_SAMPLES
            X_train, y_train, w_train = train_generator.generate(*parameters, n_samples=n_samples)
            logger.info(f"Training {clf.full_name}")
            # TODO : is it OK to provide w_train to the classifier or useless ?
            clf.fit(X_train, y_train, w_train)
            compute_summaries = ClassifierSummaryComputer(clf, n_bins=10)
            nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config)
            compute_nll = lambda mu : nll_computer(*nuisance_params, mu)
            minimizer = get_minimizer(compute_nll)
            results.update(evaluate_minuit(minimizer, [config.TRUE.interest_parameters]))
            all_results.append(results.copy())
            # TODO : Add results to some csv
            estimator_values.append(results['mu'])
        average_list.append(np.mean(estimator_values))
        variance_list.append(np.var(estimator_values))

    logger.info(f"average_list {average_list}")
    logger.info(f"variance_list {variance_list}")
    v_stat = np.mean(variance_list)
    v_syst = np.var(average_list)
    v_total = v_stat + v_syst
    logger.info(f"V_stat = {v_stat}")
    logger.info(f"V_syst = {v_syst}")
    logger.info(f"V_total = {v_total}")
Ejemplo n.º 9
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)

    # Setup model
    logger.info("Setup model")
    model = build_model(args, 0)
    os.makedirs(model.results_directory, exist_ok=True)

    # Setup data
    logger.info("Setup data")
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    seed = SEED + 99999
    train_generator = TrainGenerator(param_generator, Generator(seed))
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    i_cv = 0
    result_row = {'i_cv': i_cv}

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')
    print_line()

    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_directory, 'results.csv'))

    logger.info('Plot params')
    param_names = [CALIB_PARAM_NAME]
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_directory)

    logger.info('DONE')
Ejemplo n.º 10
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME)
    calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME)
    evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_r, calib_lam, n_bins=N_BINS)
                    for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.results_path)

    logger.info('DONE')
    return result_table
Ejemplo n.º 11
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    # train_generator = Generator(seed)
    # valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    # logger.info('Set up classifier')
    model = build_model(args, i_cv)
    # flush(logger)

    # TRAINING / LOADING
    # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    N_BINS = 10
    # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    result_table = [run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS)
                    for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.path)

    logger.info('DONE')
    return result_table
Ejemplo n.º 12
0
def explore_links():
    generator = Generator(SEED)

    config = Config()
    N_SAMPLES = 30_000
    feature_names = list(generator.feature_names) + ['Label', 'classifier', 'bin', 'log_p']
    mu_range = np.linspace(min(config.RANGE.mu), max(config.RANGE.mu), num=18)
    all_params = {"min": config.MIN, "true":config.TRUE, "max":config.MAX}
    # all_params = {"true":config.TRUE}

    clf = load_some_clf()
    all_average_df = {}
    for params_name, orig_params in all_params.items():
        print(f"computing link between X and mu using {params_name}...")
        average_list = []
        target_list = []
        for mu in mu_range:
            params = Parameter(*orig_params.nuisance_parameters, mu)
            data, label, weight = generator.generate(*params, n_samples=N_SAMPLES)
            sum_weight = np.sum(weight)
            average_array = np.sum(data*weight.reshape(-1, 1), axis=0) / sum_weight
            average_label = np.sum(label*weight, axis=0) / sum_weight
            proba = clf.predict_proba(data)
            decision = proba[:, 1]
            log_p = np.log(decision / (1 - decision))
            average_log_p = np.sum(log_p*weight, axis=0) / sum_weight
            average_clf = np.sum(decision*weight, axis=0) / sum_weight
            average_bin = np.sum((decision > 0.9)*weight, axis=0) / sum_weight
            average_array = np.hstack([average_array, average_label, average_clf, average_bin, average_log_p])
            average_list.append(average_array)
            target_list.append(mu)
        average_df = pd.DataFrame(np.array(average_list), columns=feature_names)
        all_average_df[params_name] = average_df

    for name in feature_names:
        for params_name, average_df in all_average_df.items(): 
            plt.scatter(average_df[name], target_list, label=params_name)
        plt.title(f'Link between weighted mean({name}) and mu')
        plt.ylabel('mu')
        plt.xlabel(f'weighted mean({name})')
        plt.legend()
        plt.savefig(os.path.join(DIRECTORY, f'link_{name}.png'))
        plt.clf()
def plot_MU_around_min(compute_nll, model_path):
    logger = logging.getLogger()
    pb_config = Config()

    mu_list = np.linspace(0.0, 1.0, 50)
    arr = [
        compute_nll(pb_config.TRUE_R, pb_config.TRUE_LAMBDA, mu)
        for mu in mu_list
    ]
    try:
        plt.plot(mu_list, arr, label='mu nll')
        plt.xlabel('mu')
        plt.ylabel('nll')
        plt.title('NLL around min')
        plt.legend()
        plt.savefig(os.path.join(model_path, 'mu_nll.png'))
        plt.clf()
    except Exception as e:
        logger.warning('Plot nll around min failed')
        logger.warning(str(e))
def plot_LAMBDA_around_min(compute_nll, model_path):
    logger = logging.getLogger()
    pb_config = Config()

    lam_list = np.linspace(0, 4, 50)
    arr = [
        compute_nll(pb_config.TRUE_R, lam, pb_config.TRUE_MU)
        for lam in lam_list
    ]
    try:
        plt.plot(lam_list, arr, label='lambda nll')
        plt.xlabel('lambda')
        plt.ylabel('nll')
        plt.title('NLL around min')
        plt.legend()
        plt.savefig(os.path.join(model_path, 'lambda_nll.png'))
        plt.clf()
    except Exception as e:
        logger.warning('Plot nll around min failed')
        logger.warning(str(e))
def plot_R_around_min(compute_nll, model_path):
    logger = logging.getLogger()
    pb_config = Config()

    r_list = np.linspace(-1, 1, 50)
    arr = [
        compute_nll(r, pb_config.TRUE_LAMBDA, pb_config.TRUE_MU)
        for r in r_list
    ]
    try:
        plt.plot(r_list, arr, label='r nll')
        plt.xlabel('r')
        plt.ylabel('nll')
        plt.title('NLL around min')
        plt.legend()
        plt.savefig(os.path.join(model_path, 'r_nll.png'))
        plt.clf()
    except Exception as e:
        logger.warning('Plot nll around min failed')
        logger.warning(str(e))
Ejemplo n.º 16
0
def main():
    logger = set_logger()
    logger.info("Hello world !")
    os.makedirs(DIRECTORY, exist_ok=True)
    set_plot_config()
    args = None

    config = Config()
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(DIRECTORY, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.TRUE.interest_parameters_names, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv'))
    gather_images(DIRECTORY)
Ejemplo n.º 17
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = INFERNO_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    pb_config = Config()
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.directory, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.directory, 'evaluation.csv'))
    gather_images(model.directory)
Ejemplo n.º 18
0
 def generate(self, n_samples):
     n_samples = Config(
     ).N_TRAINING_SAMPLES if n_samples is None else n_samples
     r, lam, mu = self.param_generator()
     X, y, w = self.data_generator.generate(r, lam, mu, n_samples)
     return X, lam, w, None
Ejemplo n.º 19
0
def features():
    config = Config()
    N_SAMPLES = 10_000
    R_MIN   = -0.3
    R_MAX   = 0.3 
    LAM_MIN = 2
    LAM_MAX = 4
    MU_MIN  = 0.0
    MU_MAX  = 1.0 

    generator = Generator(SEED)
    X, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=N_SAMPLES)
    n_sig = np.sum(label==1)
    n_bkg = np.sum(label==0)
    print(f"nb of signal      = {n_sig}")
    print(f"nb of backgrounds = {n_bkg}")


    df = pd.DataFrame(X, columns=["x1","x2","x3"])
    df['label'] = label
    g = sns.PairGrid(df, vars=["x1","x2","x3"], hue='label')
    g = g.map_upper(sns.scatterplot)
    g = g.map_diag(sns.kdeplot)
    g = g.map_lower(sns.kdeplot, n_levels=6)
    g = g.add_legend()
    # g = g.map_offdiag(sns.kdeplot, n_levels=6)
    g.savefig(os.path.join(DIRECTORY, 'pairgrid.png'))
    plt.clf()


    nll = generator.nll(X, config.TRUE.r, config.TRUE.lam, config.TRUE.mu)
    print(f"NLL = {nll}")

    R_RANGE = np.linspace(R_MIN, R_MAX, 100)
    nll = [generator.nll(X, r, config.TRUE.lam, config.TRUE.mu) for r in R_RANGE]
    min_nll = R_RANGE[np.argmin(nll)]
    plt.plot(R_RANGE, nll, label="nll(r)")
    plt.axvline(config.TRUE.r, c="orange", label="true r")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("r")
    plt.ylabel("NLL")
    plt.title("NLL according to r param")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_r.png'))
    plt.clf()


    LAM_RANGE = np.linspace(LAM_MIN, LAM_MAX, 100)
    nll = [generator.nll(X, config.TRUE.r, lam, config.TRUE.mu) for lam in LAM_RANGE]
    min_nll = LAM_RANGE[np.argmin(nll)]
    plt.plot(LAM_RANGE, nll, label="nll(lam)")
    plt.axvline(config.TRUE.lam, c="orange", label="true lam")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("$\lambda$")
    plt.ylabel("NLL")
    plt.title("NLL according to $\lambda$ param")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_lambda.png'))
    plt.clf()

    MU_RANGE = np.linspace(MU_MIN, MU_MAX, 100)
    nll = [generator.nll(X, config.TRUE.r, config.TRUE.lam, mu) for mu in MU_RANGE]
    min_nll = MU_RANGE[np.argmin(nll)]
    plt.plot(MU_RANGE, nll, label="nll(mu)")
    plt.axvline(config.TRUE.mu, c="orange", label="true mu")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("$\mu$")
    plt.ylabel("NLL")
    plt.title("NLL according to $\mu$ param")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_mu.png'))
    plt.clf()
def main():
    # BASIC SETUP
    logger = set_logger()
    args = parse_args()
    logger.info(args)
    flush(logger)

    # SET MODEL
    model = get_model(args)

    # LOAD/GENERATE DATA
    logger.info('Generating data ...')
    pb_config = Config()
    generator = Synthetic3D(seed=config.SEED, n_expected_events=1050)
    generator.N_SIG = pb_config.N_SIG
    generator.N_BKG = pb_config.N_BKG
    D_train = generator.train_sample(pb_config.CALIBRATED_R,
                                     pb_config.CALIBRATED_LAMBDA,
                                     pb_config.CALIBRATED_MU,
                                     n_samples=pb_config.N_TRAINING_SAMPLES)
    D_test = generator.test_sample(pb_config.CALIBRATED_R,
                                   pb_config.CALIBRATED_LAMBDA,
                                   pb_config.CALIBRATED_MU)
    X_train, y_train, w_train = split_data_label_weights(D_train)
    X_test, y_test, w_test = split_data_label_weights(D_test)

    # TRAINING
    model.fit(X_train, y_train, w_train)
    # SAVE MODEL
    i = 99
    model_name = '{}-{}'.format(model.get_name(), i)
    model_path = os.path.join(config.SAVING_DIR, model_name)
    logger.info("Saving in {}".format(model_path))
    os.makedirs(model_path, exist_ok=True)
    model.save(model_path)

    # CHECK TRAINING
    plot_test_distrib(model, model_name, model_path, X_test, y_test)
    plot_summaries(model, model_name, model_path, X_test, y_test, w_test)

    # NLL
    summary_computer = lambda X, w: compute_summaries(model, X, w, n_bins=10)
    D_final = generator.final_sample(pb_config.TRUE_R, pb_config.TRUE_LAMBDA,
                                     pb_config.TRUE_MU)
    X_final, y_final, w_final = split_data_label_weights(D_final)
    compute_nll = Synthetic3DNLL(summary_computer, generator, X_final, w_final)

    # NLL PLOTS
    plot_R_around_min(compute_nll, model_path)
    plot_LAMBDA_around_min(compute_nll, model_path)
    plot_MU_around_min(compute_nll, model_path)

    # MINIMIZE NLL
    minimizer = iminuit.Minuit(
        compute_nll,
        errordef=ERRORDEF_NLL,
        r=pb_config.CALIBRATED_R,
        error_r=pb_config.CALIBRATED_R_ERROR,
        #limit_r=(0, None),
        lam=pb_config.CALIBRATED_LAMBDA,
        error_lam=pb_config.CALIBRATED_LAMBDA_ERROR,
        limit_lam=(0, None),
        mu=pb_config.CALIBRATED_MU,
        error_mu=pb_config.CALIBRATED_MU_ERROR,
        limit_mu=(0, 1),
    )
    minimizer.print_param()
    fmin, param = minimizer.migrad()
    param = minimizer.hesse()
    for name, (value,
               err) in {p['name']: (p['value'], p['error'])
                        for p in param}.items():
        print('{name:3} = {value} ({err})'.format(**locals()))

    print('true_r', pb_config.TRUE_R)
    print('true_lam', pb_config.TRUE_LAMBDA)
    print('true_mu', pb_config.TRUE_MU)

    print(param[2]['value'] * 1050, 'signal events estimated')
    print(param[2]['error'] * 1050, 'error on # estimated sig event')
    print('Done.')
Ejemplo n.º 21
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = Config()
    seed = config.SEED + i_cv * 5
    train_generator = Synthetic3DGeneratorTorch(seed)
    valid_generator = S3D2(seed + 1)
    test_generator = S3D2(seed + 2)

    # SET MODEL
    logger.info('Set up inferno')
    model = build_model(args, i_cv)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_inferno(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model))

    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        pb_config.CALIBRATED_R,
        pb_config.CALIBRATED_LAMBDA,
        pb_config.CALIBRATED_MU,
        n_samples=pb_config.N_VALIDATION_SAMPLES)

    # MEASUREMENT
    N_BINS = args.n_bins
    compute_summaries = model.compute_summaries
    for mu in pb_config.TRUE_MU_RANGE:
        true_params = Parameter(pb_config.TRUE.r, pb_config.TRUE.lam, mu)
        suffix = f'-mu={true_params.mu:1.2f}_r={true_params.r}_lambda={true_params.lam}'
        logger.info('Generate testing data')
        X_test, y_test, w_test = test_generator.generate(
            *true_params, n_samples=pb_config.N_TESTING_SAMPLES)
        # PLOT SUMMARIES
        evaluate_summary_computer(model,
                                  X_valid,
                                  y_valid,
                                  w_valid,
                                  X_test,
                                  w_test,
                                  n_bins=N_BINS,
                                  prefix='',
                                  suffix=suffix)

        logger.info('Set up NLL computer')
        compute_nll = S3D2NLL(compute_summaries, valid_generator, X_test,
                              w_test)
        # NLL PLOTS
        plot_nll_around_min(compute_nll, true_params, model.path, suffix)

        # MINIMIZE NLL
        logger.info('Prepare minuit minimizer')
        minimizer = get_minimizer(compute_nll, pb_config.CALIBRATED,
                                  pb_config.CALIBRATED_ERROR)
        fmin, params = estimate(minimizer)
        result_row.update(evaluate_minuit(minimizer, fmin, params,
                                          true_params))

        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    param_names = pb_config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.path)

    logger.info('DONE')
    return result_table