コード例 #1
0
ファイル: TP.py プロジェクト: victor-estrade/SystGradDescent
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()


    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # MEASUREMENT
    result_row = {'i_cv': i_cv}
    results = []
    for test_config in config.iter_test_config():
        logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples")
        for threshold in np.linspace(0, 1, 500):
            result_row = {'i_cv': i_cv}
            result_row['threshold'] = threshold
            result_row.update(test_config.TRUE.to_dict(prefix='true_'))
            result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES

            X, y, w = valid_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            beta = np.sum(y[selected] == 0)
            gamma = np.sum(y[selected] == 1)
            result_row['beta'] = beta
            result_row['gamma'] = gamma

            X, y, w = test_generator.generate(*config.TRUE, n_samples=config.N_VALIDATION_SAMPLES)
            proba = model.predict_proba(X)
            decision = proba[:, 1]
            selected = decision > threshold
            n_selected = np.sum(selected)
            n_selected_bkg = np.sum(y[selected] == 0)
            n_selected_sig = np.sum(y[selected] == 1)
            result_row['n'] = n_selected
            result_row['b'] = n_selected_bkg
            result_row['s'] = n_selected_sig
            result_row['s_sqrt_n'] = n_selected_sig / np.sqrt(n_selected)
            result_row['s_sqrt_b'] = n_selected_sig / np.sqrt(n_selected)
            results.append(result_row.copy())
    results = pd.DataFrame(results)
    print(results)
    return results
コード例 #2
0
 def __init__(self, X_test, w_test, i_cv, args, config=None, n_bins=10):
     self.X_test = X_test
     self.w_test = w_test
     self.args = args
     self.i_cv = i_cv
     self.config = Config() if config is None else config
     self.n_bins = n_bins
コード例 #3
0
def main():
    logger = set_logger()
    directory = os.path.join(DIRECTORY, "nll_contour")
    os.makedirs(directory, exist_ok=True)
    args = parse_args()

    train_generator, valid_generator, test_generator = get_generators()

    config = Config()
    model = load_some_NN(cuda=args.cuda)
    compute_nll = get_nll_computer(model, config, valid_generator,
                                   test_generator)

    nll = compute_nll(*config.CALIBRATED)
    logger.info(f"calib nll = {nll}")
    nll = compute_nll(*config.TRUE)
    logger.info(f"calib nll = {nll}")

    f = lambda xk: compute_nll(*xk)
    xk = np.array(list(config.TRUE))
    print(xk)
    EPSILON = 1e-8
    epsilon = np.array([EPSILON] * 2)
    grad = approx_fprime(xk, f, epsilon)
    print(grad, grad.dot(grad.T))

    logger.info(f"Running BFGS on the NLL")
    x_0 = np.array(list(config.CALIBRATED))
    print(fmin_bfgs(f, x_0))
コード例 #4
0
def _make_rescale_plot_clf(true_rescale, true_mu):
    config = Config()
    test_generator = Generator(seed=SEED)
    X_test, y_test, w_test = test_generator.generate(
        true_rescale, true_mu, n_samples=config.N_TESTING_SAMPLES)

    i_cv = 0
    model = load_some_NN(i_cv=i_cv, cuda=False)
    compute_summaries = model.summary_computer(n_bins=N_BINS)
    valid_generator = Generator(seed=SEED - 1)
    compute_nll = NLLComputer(compute_summaries,
                              valid_generator,
                              X_test,
                              w_test,
                              config=config)

    rescale_array = np.linspace(0.5, 3, 50)
    nll_array = [compute_nll(rescale, true_mu) for rescale in rescale_array]
    param_name = 'rescale'
    p = plt.plot(rescale_array,
                 nll_array,
                 label=f'NLL {param_name}={true_rescale}')
    plt.axvline(x=true_rescale,
                linestyle='--',
                color=p[0].get_color(),
                label='true value')
コード例 #5
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    eval_table = get_eval_table(args, model.results_directory)
    # EVALUATION
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
コード例 #6
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.results_directory, 'estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
コード例 #7
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    N_BINS = 10
    X_train, y_train, w_train = train_generator.generate(
        *config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
    compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train)

    result_table = [
        run_iter(compute_summaries, i_cv, i, test_config, valid_generator,
                 test_generator, directory)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title='Likelihood fit',
                    directory=directory)

    return result_table
コード例 #8
0
def main():
    logger = set_logger()
    root_directory = os.path.join(DIRECTORY, "nll_contour")
    os.makedirs(root_directory, exist_ok=True)
    args = parse_args()

    N_CV = 3
    # FIXME : remove lili and STEP to use all iteration !
    STEP = 1
    lili = list(Config().iter_test_config())[::STEP]
    N_ITER = len(lili)
    logger.info(f"{N_CV} cv and {N_ITER} iteractions ({N_ITER*N_CV} loops)")
    data = []
    for i_cv in range(N_CV):
        model = load_some_NN(i_cv=i_cv, cuda=args.cuda)
        model.to_double()
        # model = load_some_GB(i_cv=i_cv)
        for i_iter, config in enumerate(lili):
            i_iter = i_iter * STEP
            values = run_cv_iter(args, i_cv, i_iter, config, model,
                                 root_directory)
            data.append(values)
    data = pd.DataFrame(data)
    fname = os.path.join(root_directory, "data.csv")
    data.to_csv(fname)
コード例 #9
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = FF_parse_args(main_description="Training launcher for Feature Filter on GG benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    if not args.conditional_only:
        eval_table = get_eval_table(args, model.results_directory)
    if not args.estimate_only:
        eval_conditional = get_eval_conditional(args, model.results_directory)
    if not args.estimate_only and not args.conditional_only:
        eval_table = pd.concat([eval_table, eval_conditional], axis=1)
        # EVALUATION
        print_line()
        print_line()
        print(eval_table)
        print_line()
        print_line()
        eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
コード例 #10
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    # test_generator  = Generator(seed+2)

    results = []

    for n_train_samples in N_TRAIN_RANGE:
        result_row['n_train_samples'] = n_train_samples
        # SET MODEL
        logger.info('Set up classifier')
        model = build_model(args, i_cv)
        os.makedirs(model.results_path, exist_ok=True)
        flush(logger)

        # TRAINING / LOADING
        X_train, y_train, w_train = train_generator.generate(
            *config.CALIBRATED, n_samples=n_train_samples)
        model.fit(X_train, y_train, w_train)

        # CHECK TRAINING
        logger.info('Generate validation data')
        X_valid, y_valid, w_valid = valid_generator.generate(
            *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

        some_eval = evaluate_classifier(model,
                                        X_valid,
                                        y_valid,
                                        w_valid,
                                        prefix='valid',
                                        suffix=f'-{n_train_samples}')
        result_row['valid_auc'] = some_eval[f'valid_auc-{n_train_samples}']
        result_row['valid_accuracy'] = some_eval[
            f'valid_accuracy-{n_train_samples}']

        N_BINS = 10
        evaluate_summary_computer(model,
                                  X_valid,
                                  y_valid,
                                  w_valid,
                                  n_bins=N_BINS,
                                  prefix='valid_',
                                  suffix=f'{n_train_samples}')

        results.append(result_row.copy())
    result_table = pd.DataFrame(results)

    return result_table
コード例 #11
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)
    train_generator = TrainGenerator(param_generator, train_generator)

    # SET MODEL
    logger.info('Set up regressor')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    iter_results = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = [e0 for e0, e1 in iter_results]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    conditional_estimate = pd.concat([e1 for e0, e1 in iter_results])
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return result_table, conditional_estimate
コード例 #12
0
 def generate(self, n_samples):
     if n_samples is not None:
         params = self.param_generator()
         X, y, w = self.data_generator.generate(*params, n_samples)
         return X, params.interest_parameters, w, params.nuisance_parameters
     else:
         config = Config()
         X, y, w = self.data_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
         return X, y, w, 1
コード例 #13
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    # train_generator = Generator(seed)
    # valid_generator = Generator(seed+1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    # logger.info('Set up classifier')
    model = build_model(args, i_cv)
    # flush(logger)

    # TRAINING / LOADING
    # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    N_BINS = 10
    # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    result_table = [
        run_iter(model,
                 result_row,
                 i,
                 i_cv,
                 args,
                 test_config,
                 test_generator,
                 n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.path)

    logger.info('DONE')
    return result_table
コード例 #14
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)

    # Setup model
    logger.info("Setup model")
    model = build_model(args, 0)
    os.makedirs(model.results_directory, exist_ok=True)

    # Setup data
    logger.info("Setup data")
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    seed = SEED + 99999
    train_generator = TrainGenerator(param_generator, Generator(seed))
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    i_cv = 0
    result_row = {'i_cv': i_cv}

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')
    print_line()

    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_directory, 'results.csv'))

    logger.info('Plot params')
    param_names = [CALIB_PARAM_NAME]
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_directory)

    logger.info('DONE')
コード例 #15
0
def explore_distribs():
    config = Config()
    generator = Generator()
    data, label = generator.sample_event(*config.TRUE,
                                         size=config.N_TESTING_SAMPLES)

    prior_rescale = stats.norm(loc=config.CALIBRATED.rescale,
                               scale=config.CALIBRATED_ERROR.rescale)
    prior_mu = stats.uniform(loc=0, scale=1)

    plot_data_distrib(generator, config)
    plot_prior(prior_rescale, "rescale")
    plot_prior(prior_mu, "mu")
コード例 #16
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = INFERNO_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    model = build_model(args, -1)
    os.makedirs(model.results_directory, exist_ok=True)
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    # RUN
    if args.load_run:
        logger.info(f'Loading previous runs [{args.start_cv},{args.end_cv}[')
        directory = model.results_directory
        estimations = load_estimations(directory,
                                       start_cv=args.start_cv,
                                       end_cv=args.end_cv)
        conditional_estimations = load_conditional_estimations(
            directory, start_cv=args.start_cv, end_cv=args.end_cv)
    else:
        logger.info(f'Running runs [{args.start_cv},{args.end_cv}[')
        results = [
            run(args, i_cv) for i_cv in range(args.start_cv, args.end_cv)
        ]
        estimations = [e0 for e0, e1 in results]
        estimations = pd.concat(estimations, ignore_index=True)
        conditional_estimations = [e1 for e0, e1 in results]
        conditional_estimations = pd.concat(conditional_estimations)
    estimations.to_csv(os.path.join(model.results_directory,
                                    'estimations.csv'))
    conditional_estimations.to_csv(
        os.path.join(model.results_directory, 'conditional_estimations.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.INTEREST_PARAM_NAME, estimations)
    eval_conditional = evaluate_conditional_estimation(
        conditional_estimations,
        interest_param_name=config.INTEREST_PARAM_NAME)
    eval_table = pd.concat([eval_table, eval_conditional], axis=1)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
コード例 #17
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()


    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain)

    some_fisher = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu)
    some_fisher_bis = compute_fisher(*compute_bins(model, valid_generator, config, n_bins=3), config.TRUE.mu)

    assert some_fisher == some_fisher_bis, f"Fisher info should be deterministic but found : {some_fisher} =/= {some_fisher_bis}"

    # MEASUREMENT
    result_row = {'i_cv': i_cv}
    results = []
    for test_config in config.iter_test_config():
        logger.info(f"Running test set : {test_config.TRUE}, {test_config.N_TESTING_SAMPLES} samples")
        for n_bins in range(1, 30):
            result_row = {'i_cv': i_cv}
            gamma_array, beta_array = compute_bins(model, valid_generator, test_config, n_bins=n_bins)
            fisher = compute_fisher(gamma_array, beta_array, test_config.TRUE.mu)
            result_row.update({f'gamma_{i}' : gamma for i, gamma in enumerate(gamma_array, 1)})
            result_row.update({f'beta_{i}' : beta for i, beta in enumerate(beta_array, 1)})
            result_row.update(test_config.TRUE.to_dict(prefix='true_'))
            result_row['n_test_samples'] = test_config.N_TESTING_SAMPLES
            result_row['fisher'] = fisher
            result_row['n_bins'] = n_bins
            results.append(result_row.copy())
    results = pd.DataFrame(results)
    print(results)
    return results
コード例 #18
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    iter_results = [run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, n_bins=N_BINS)
                    for i, test_config in enumerate(config.iter_test_config())]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
コード例 #19
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator, valid_generator, test_generator = get_generators_torch(seed, cuda=args.cuda, GeneratorClass=GeneratorClass)
    train_generator = GeneratorCPU(train_generator)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = GeneratorCPU(valid_generator)
    test_generator = GeneratorCPU(test_generator)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES, no_grad=True)

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    iter_results = [run_conditional_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator)
                    for i, test_config in enumerate(config.iter_test_config())]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
コード例 #20
0
def main():
    logger = set_logger()
    logger.info("Hello world !")
    os.makedirs(DIRECTORY, exist_ok=True)
    set_plot_config()
    args = None

    config = Config()
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(DIRECTORY, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.TRUE.interest_parameters_names,
                                    results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv'))
    gather_images(DIRECTORY)
コード例 #21
0
def explore_links():
    config = Config()
    generator = Generator()
    rescale_range = np.linspace(min(config.RANGE.rescale),
                                max(config.RANGE.rescale),
                                num=5)
    mu_range = np.linspace(min(config.RANGE.mu), max(config.RANGE.mu), num=15)
    for rescale in rescale_range:
        average_list = []
        target_list = []
        for mu in mu_range:
            data, label = generator.sample_event(rescale,
                                                 mu,
                                                 size=config.N_TESTING_SAMPLES)
            average_list.append(np.mean(data, axis=0))
            target_list.append(mu)
        plt.scatter(average_list, target_list, label=f'rescale={rescale}')

    plt.title('Link between mean(x) and mu')
    plt.ylabel('mu')
    plt.xlabel('mean(x)')
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'mean_link.png'))
    plt.clf()
コード例 #22
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = GB_parse_args(
        main_description=
        "Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # Config
    config = Config()
    config.TRUE = Parameter(rescale=0.9, mu=0.1)
    train_generator = Generator(SEED)
    valid_generator = Generator(SEED + 1)
    test_generator = Generator(SEED + 2)
    X_test, y_test, w_test = test_generator.generate(
        *config.TRUE, n_samples=config.N_TESTING_SAMPLES)

    # for nuisance in p(nuisance | data)
    nuisance_param_sample = [
        param_generator().nuisance_parameters for _ in range(25)
    ]
    average_list = []
    variance_list = []
    result_table = []
    for nuisance_params in nuisance_param_sample:
        logger.info(f"nuisance_params = {nuisance_params}")
        estimator_values = []
        for i_cv in range(N_ITER):
            clf = build_model(args, i_cv)
            parameters = Parameter(*nuisance_params,
                                   config.CALIBRATED.interest_parameters)
            print(parameters)
            n_samples = config.N_TRAINING_SAMPLES
            X_train, y_train, w_train = train_generator.generate(
                *parameters, n_samples=n_samples)
            logger.info(f"Training {clf.full_name}")
            clf.fit(X_train, y_train, w_train)
            compute_summaries = ClassifierSummaryComputer(clf, n_bins=10)
            nll_computer = NLLComputer(compute_summaries,
                                       valid_generator,
                                       X_test,
                                       w_test,
                                       config=config)
            compute_nll = lambda mu: nll_computer(*nuisance_params, mu)
            minimizer = get_minimizer(compute_nll)
            results = evaluate_minuit(minimizer,
                                      [config.TRUE.interest_parameters])
            estimator_values.append(results['mu'])
            results['i_cv'] = i_cv
            results.update(params_to_dict(parameters, suffix='true'))
            result_table.append(results.copy())
        average_list.append(np.mean(estimator_values))
        variance_list.append(np.var(estimator_values))

    model = build_model(args, 0)
    model.set_info(DATA_NAME, BENCHMARK_NAME, 0)
    save_directory = model.results_path
    os.makedirs(save_directory, exist_ok=True)
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(save_directory, 'results.csv'))
    logger.info(f"average_list {average_list}")
    logger.info(f"variance_list {variance_list}")
    v_stat = np.mean(variance_list)
    v_syst = np.var(average_list)
    v_total = v_stat + v_syst
    logger.info(f"V_stat = {v_stat}")
    logger.info(f"V_syst = {v_syst}")
    logger.info(f"V_total = {v_total}")
    eval_dict = {"V_stat": v_stat, "V_syst": v_syst, "V_total": v_total}
    eval_path = os.path.join(save_directory, 'info.json')
    with open(eval_path, 'w') as f:
        json.dump(eval_dict, f)
コード例 #23
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up regressor')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_inferno(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_inferno(model, prefix='valid')

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_estimation_iter(model,
                            result_row,
                            i,
                            test_config,
                            valid_generator,
                            test_generator,
                            n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table