Example #1
0
def run_conditional_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = GeneratorTorch(seed, cuda=args.cuda)
    train_generator = TrainGenerator(train_generator, cuda=args.cuda)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(
        evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    evaluate_summary_computer(model,
                              X_valid,
                              y_valid,
                              w_valid,
                              n_bins=N_BINS,
                              prefix='valid_',
                              suffix='')
    iter_results = [
        run_conditional_estimation_iter(model,
                                        result_row,
                                        i,
                                        test_config,
                                        valid_generator,
                                        test_generator,
                                        n_bins=N_BINS)
        for i, test_config in enumerate(config.iter_test_config())
    ]

    conditional_estimate = pd.concat(iter_results)
    conditional_estimate['i_cv'] = i_cv
    fname = os.path.join(model.results_path, "conditional_estimations.csv")
    conditional_estimate.to_csv(fname)
    logger.info('DONE')
    return conditional_estimate
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)
    train_generator = TrainGenerator(param_generator, train_generator)

    # SET MODEL
    logger.info('Set up regressor')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(
        *config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')

    # MEASUREMENT
    result_row['nfcn'] = NCALL
    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_path)

    logger.info('DONE')
    return result_table
def main():
    # BASIC SETUP
    logger = set_logger()
    args = GB_parse_args(main_description="Training launcher for Gradient boosting on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # Config
    config = Config()
    config.TRUE = Parameter(r=0.1, lam=2.7, mu=0.1)

    train_generator = Generator(SEED)
    valid_generator = Generator(SEED+1)
    test_generator  = Generator(SEED+2)
    X_test, y_test, w_test = test_generator.generate(*config.TRUE, n_samples=config.N_TESTING_SAMPLES)

    # for nuisance in p(nuisance | data)
    nuisance_param_sample = [param_generator().nuisance_parameters for _ in range(25)]
    average_list = []
    variance_list = []
    all_results = []
    for nuisance_params in nuisance_param_sample:
        logger.info(f"nuisance_params = {nuisance_params}")
        estimator_values = []
        results = {name : value for name, value in zip(config.TRUE.nuisance_parameters_names, nuisance_params)}
        for i_cv in range(N_ITER):
            clf = build_model(args, i_cv)
            parameters = Parameter(*nuisance_params, config.CALIBRATED.interest_parameters)
            print(parameters)
            n_samples = config.N_TRAINING_SAMPLES
            X_train, y_train, w_train = train_generator.generate(*parameters, n_samples=n_samples)
            logger.info(f"Training {clf.full_name}")
            # TODO : is it OK to provide w_train to the classifier or useless ?
            clf.fit(X_train, y_train, w_train)
            compute_summaries = ClassifierSummaryComputer(clf, n_bins=10)
            nll_computer = NLLComputer(compute_summaries, valid_generator, X_test, w_test, config=config)
            compute_nll = lambda mu : nll_computer(*nuisance_params, mu)
            minimizer = get_minimizer(compute_nll)
            results.update(evaluate_minuit(minimizer, [config.TRUE.interest_parameters]))
            all_results.append(results.copy())
            # TODO : Add results to some csv
            estimator_values.append(results['mu'])
        average_list.append(np.mean(estimator_values))
        variance_list.append(np.var(estimator_values))

    logger.info(f"average_list {average_list}")
    logger.info(f"variance_list {variance_list}")
    v_stat = np.mean(variance_list)
    v_syst = np.var(average_list)
    v_total = v_stat + v_syst
    logger.info(f"V_stat = {v_stat}")
    logger.info(f"V_syst = {v_syst}")
    logger.info(f"V_total = {v_total}")
Example #4
0
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)

    # Setup model
    logger.info("Setup model")
    model = build_model(args, 0)
    os.makedirs(model.results_directory, exist_ok=True)

    # Setup data
    logger.info("Setup data")
    config = Config()
    config_table = evaluate_config(config)
    config_table.to_csv(
        os.path.join(model.results_directory, 'config_table.csv'))
    seed = SEED + 99999
    train_generator = TrainGenerator(param_generator, Generator(seed))
    valid_generator = Generator(seed + 1)
    test_generator = Generator(seed + 2)

    i_cv = 0
    result_row = {'i_cv': i_cv}

    # TRAINING / LOADING
    train_or_load_neural_net(model, train_generator, retrain=args.retrain)

    # CHECK TRAINING
    result_row.update(evaluate_neural_net(model, prefix='valid'))
    evaluate_regressor(model, prefix='valid')
    print_line()

    result_table = [
        run_iter(model, result_row, i, test_config, valid_generator,
                 test_generator)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_directory, 'results.csv'))

    logger.info('Plot params')
    param_names = [CALIB_PARAM_NAME]
    for name in param_names:
        plot_params(name,
                    result_table,
                    title=model.full_name,
                    directory=model.results_directory)

    logger.info('DONE')
Example #5
0
def run_estimation(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    train_generator = TrainGenerator(param_generator, train_generator)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    logger.info('Set up classifier')
    model = build_model(args, i_cv)
    os.makedirs(model.results_path, exist_ok=True)
    flush(logger)

    # TRAINING / LOADING
    train_or_load_pivot(model, train_generator, config.N_TRAINING_SAMPLES*N_AUGMENT, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    result_row.update(evaluate_neural_net(model, prefix='valid'))
    result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    calib_r = load_calib_r(DATA_NAME, BENCHMARK_NAME)
    calib_lam = load_calib_lam(DATA_NAME, BENCHMARK_NAME)
    evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    iter_results = [run_estimation_iter(model, result_row, i, test_config, valid_generator, test_generator, calib_r, calib_lam, n_bins=N_BINS)
                    for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(iter_results)
    result_table.to_csv(os.path.join(model.results_path, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.results_path)

    logger.info('DONE')
    return result_table
def main():
    # BASIC SETUP
    logger = set_logger()
    args = REG_parse_args(
        main_description="Training launcher for Regressor on S3D2 benchmark")
    logger.info(args)
    flush(logger)
    # INFO
    args.net = AR5R5E(n_in=3, n_out=2, n_extra=2)
    args.optimizer = get_optimizer(args)
    model = get_model(args, Regressor)
    model.set_info(DATA_NAME, BENCHMARK_NAME, -1)
    pb_config = S3D2Config()

    # RUN
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(model.results_directory, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(pb_config.INTEREST_PARAM_NAME, results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(model.results_directory, 'evaluation.csv'))
    gather_images(model.results_directory)
Example #7
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = S3D2Config()
    seed = SEED + i_cv * 5
    test_seed = seed + 2

    result_table = [
        run_iter(i_cv, i, test_config, test_seed, directory)
        for i, test_config in enumerate(config.iter_test_config())
    ]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'estimations.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name,
                    result_table,
                    title='Likelihood fit',
                    directory=directory)

    return result_table
Example #8
0
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()
    directory = os.path.join(DIRECTORY, f'cv_{i_cv}')
    os.makedirs(directory, exist_ok=True)

    config = Config()
    seed = SEED + i_cv * 5
    train_generator = Generator(seed)
    valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    N_BINS = 10
    X_train, y_train, w_train = train_generator.generate(*config.CALIBRATED, n_samples=config.N_TRAINING_SAMPLES)
    compute_summaries = HistogramSummaryComputer(n_bins=N_BINS).fit(X_train)

    result_table = [run_iter(compute_summaries, i_cv, i, test_config, valid_generator, test_generator, directory) for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(directory, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title='Likelihood fit', directory=directory)

    return result_table
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    config = Config()
    seed = SEED + i_cv * 5
    # train_generator = Generator(seed)
    # valid_generator = Generator(seed+1)
    test_generator  = Generator(seed+2)

    # SET MODEL
    # logger.info('Set up classifier')
    model = build_model(args, i_cv)
    # flush(logger)

    # TRAINING / LOADING
    # train_or_load_classifier(model, train_generator, config.CALIBRATED, config.N_TRAINING_SAMPLES, retrain=args.retrain)

    # CHECK TRAINING
    logger.info('Generate validation data')
    # X_valid, y_valid, w_valid = valid_generator.generate(*config.CALIBRATED, n_samples=config.N_VALIDATION_SAMPLES)

    # result_row.update(evaluate_classifier(model, X_valid, y_valid, w_valid, prefix='valid'))

    # MEASUREMENT
    N_BINS = 10
    # evaluate_summary_computer(model, X_valid, y_valid, w_valid, n_bins=N_BINS, prefix='valid_', suffix='')
    result_table = [run_iter(model, result_row, i, i_cv, args, test_config, test_generator, n_bins=N_BINS)
                    for i, test_config in enumerate(config.iter_test_config())]
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(model.results_path, 'results.csv'))
    logger.info('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        plot_params(name, result_table, title=model.full_name, directory=model.path)

    logger.info('DONE')
    return result_table
Example #10
0
def main():
    logger = set_logger()
    logger.info("Hello world !")
    os.makedirs(DIRECTORY, exist_ok=True)
    set_plot_config()
    args = None

    config = S3D2Config()
    results = [run(args, i_cv) for i_cv in range(N_ITER)]
    results = pd.concat(results, ignore_index=True)
    results.to_csv(os.path.join(DIRECTORY, 'results.csv'))
    # EVALUATION
    eval_table = evaluate_estimator(config.TRUE.interest_parameters_names,
                                    results)
    print_line()
    print_line()
    print(eval_table)
    print_line()
    print_line()
    eval_table.to_csv(os.path.join(DIRECTORY, 'evaluation.csv'))
    gather_images(DIRECTORY)
def param_generator():
    pb_config = S3D2Config()

    # r = np.random.normal(pb_config.CALIBRATED_R, pb_config.CALIBRATED_R_ERROR)
    # lam = -1
    # while lam <= 0:
    #     lam = np.random.normal(pb_config.CALIBRATED_LAMBDA, pb_config.CALIBRATED_LAMBDA_ERROR)

    r = pb_config.CALIBRATED_R
    lam = pb_config.CALIBRATED_LAMBDA
    mu_min = min(pb_config.TRUE_MU_RANGE)
    mu_max = max(pb_config.TRUE_MU_RANGE)
    mu_range = mu_max - mu_min
    mu_min = max(0.0, mu_min - mu_range / 10)
    mu_max = min(1.0, mu_max + mu_range / 10)

    mu = np.random.uniform(0, 1)
    return Parameter(
        r,
        lam,
        mu,
    )
Example #12
0
def likelihood_fit():
    print("Hello world !")
    set_plot_config()
    config = S3D2Config()
    DATA_N_SAMPLES = 80_000

    result_table = []

    for mu in config.TRUE_MU_RANGE[1:]:
        result_row = {}
        config.TRUE_MU = mu
        generator = S3D2(SEED)
        data, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE_MU, size=DATA_N_SAMPLES)
        n_sig = np.sum(label==1)
        n_bkg = np.sum(label==0)
        print(f"nb of signal      = {n_sig}")
        print(f"nb of backgrounds = {n_bkg}")


        compute_nll = lambda r, lam, mu : generator.nll(data, r, lam, mu)

        print('Prepare minuit minimizer')
        minimizer = get_minimizer(compute_nll, config)
        fmin, params = estimate(minimizer)
        params_truth = [config.TRUE_R, config.TRUE_LAMBDA, config.TRUE_MU]
        my_print_params(params, params_truth)
        register_params(params, params_truth, result_row)
        result_row['is_mingrad_valid'] = minimizer.migrad_ok()
        result_row.update(fmin)
        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)
    result_table.to_csv(os.path.join(DIRECTORY, 'results.csv'))
    print('Plot params')
    param_names = config.PARAM_NAMES
    for name in param_names:
        my_plot_params(name, result_table)
def run(args, i_cv):
    logger = logging.getLogger()
    print_line()
    logger.info('Running iter n°{}'.format(i_cv))
    print_line()

    result_row = {'i_cv': i_cv}
    result_table = []

    # LOAD/GENERATE DATA
    logger.info('Set up data generator')
    pb_config = S3D2Config()
    seed = config.SEED + i_cv * 5
    train_generator = S3D2(seed)
    valid_generator = S3D2(seed + 1)
    test_generator = S3D2(seed + 2)

    # SET MODEL
    logger.info('Set up rergessor')
    args.net = AR5R5E(n_in=3, n_out=2, n_extra=2)
    args.optimizer = get_optimizer(args)
    model = get_model(args, Regressor)
    model.set_info(BENCHMARK_NAME, i_cv)
    model.param_generator = param_generator
    flush(logger)

    # TRAINING / LOADING
    if not args.retrain:
        try:
            logger.info('loading from {}'.format(model.model_path))
            model.load(model.model_path)
        except Exception as e:
            logger.warning(e)
            args.retrain = True
    if args.retrain:
        logger.info('Training {}'.format(model.get_name()))
        model.fit(train_generator)
        logger.info('Training DONE')

        # SAVE MODEL
        save_model(model)

    # CHECK TRAINING
    logger.info('Plot losses')
    plot_REG_losses(model)
    plot_REG_log_mse(model)
    result_row['loss'] = model.losses[-1]
    result_row['mse_loss'] = model.mse_losses[-1]

    # MEASUREMENT
    for mu in pb_config.TRUE_MU_RANGE:
        pb_config.TRUE_MU = mu
        logger.info('Generate testing data')
        test_generator.reset()
        X_test, y_test, w_test = test_generator.generate(
            # pb_config.TRUE_R,
            # pb_config.TRUE_LAMBDA,
            pb_config.CALIBRATED_R,
            pb_config.CALIBRATED_LAMBDA,
            pb_config.TRUE_MU,
            n_samples=pb_config.N_TESTING_SAMPLES)

        p_test = np.array(
            (pb_config.CALIBRATED_R, pb_config.CALIBRATED_LAMBDA))

        pred, sigma = model.predict(X_test, w_test, p_test)
        name = pb_config.INTEREST_PARAM_NAME
        result_row[name] = pred
        result_row[name + _ERROR] = sigma
        result_row[name + _TRUTH] = pb_config.TRUE_MU
        logger.info('{} =vs= {} +/- {}'.format(pb_config.TRUE_MU, pred, sigma))
        result_table.append(result_row.copy())
    result_table = pd.DataFrame(result_table)

    logger.info('Plot params')
    name = pb_config.INTEREST_PARAM_NAME
    plot_params(name,
                result_table,
                title=model.full_name,
                directory=model.results_path)

    logger.info('DONE')
    return result_table
Example #14
0
def explore():
    print("Hello master !")
    set_plot_config()
    config = S3D2Config()
    N_SAMPLES = 10_000
    R_MIN   = -0.3
    R_MAX   = 0.3 
    LAM_MIN = 2
    LAM_MAX = 4
    MU_MIN  = 0.0
    MU_MAX  = 1.0 

    generator = S3D2(SEED)
    X, label = generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=N_SAMPLES)
    n_sig = np.sum(label==1)
    n_bkg = np.sum(label==0)
    print(f"nb of signal      = {n_sig}")
    print(f"nb of backgrounds = {n_bkg}")


    df = pd.DataFrame(X, columns=["x1","x2","x3"])
    df['label'] = label
    g = sns.PairGrid(df, vars=["x1","x2","x3"], hue='label')
    g = g.map_upper(sns.scatterplot)
    g = g.map_diag(sns.kdeplot)
    g = g.map_lower(sns.kdeplot, n_levels=6)
    g = g.add_legend()
    # g = g.map_offdiag(sns.kdeplot, n_levels=6)
    g.savefig(os.path.join(DIRECTORY, 'pairgrid.png'))
    plt.clf()


    nll = generator.nll(X, config.TRUE.r, config.TRUE.lam, config.TRUE.mu)
    print(f"NLL = {nll}")

    R_RANGE = np.linspace(R_MIN, R_MAX, 30)
    nll = [generator.nll(X, r, config.TRUE.lam, config.TRUE.mu) for r in R_RANGE]
    min_nll = R_RANGE[np.argmin(nll)]
    plt.plot(R_RANGE, nll, label="nll(r)")
    plt.axvline(config.TRUE.r, c="orange", label="true r")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("r")
    plt.ylabel("NLL")
    plt.title("NLL according to r param")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_r.png'))
    plt.clf()


    LAM_RANGE = np.linspace(LAM_MIN, LAM_MAX, 30)
    nll = [generator.nll(X, config.TRUE.r, lam, config.TRUE.mu) for lam in LAM_RANGE]
    min_nll = LAM_RANGE[np.argmin(nll)]
    plt.plot(LAM_RANGE, nll, label="nll(lam)")
    plt.axvline(config.TRUE.lam, c="orange", label="true lam")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("$\lambda$")
    plt.ylabel("NLL")
    plt.title("NLL according to $\lambda$ param")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_lambda.png'))
    plt.clf()

    MU_RANGE = np.linspace(MU_MIN, MU_MAX, 30)
    nll = [generator.nll(X, config.TRUE.r, config.TRUE.lam, mu) for mu in MU_RANGE]
    min_nll = MU_RANGE[np.argmin(nll)]
    plt.plot(MU_RANGE, nll, label="nll(mu)")
    plt.axvline(config.TRUE.mu, c="orange", label="true mu")
    plt.axvline(min_nll, c="red", label="min nll")
    plt.xlabel("$\mu$")
    plt.ylabel("NLL")
    plt.title("NLL according to $\mu$ param")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'NLL_mu.png'))
    plt.clf()
Example #15
0
def main():
    print("Hello world !")
    set_plot_config()
    config = S3D2Config()
    DATA_N_SAMPLES = 8_000
    R_MIN   = -0.3
    R_MAX   = 0.3 
    LAM_MIN = 2
    LAM_MAX = 4
    MU_MIN  = 0.1
    MU_MAX  = 0.3 

    R_N_SAMPLES = 101
    LAM_N_SAMPLES = 102
    MU_N_SAMPLES = 103

    prior_r   = stats.uniform(loc=R_MIN, scale=R_MAX-R_MIN)
    prior_lam = stats.uniform(loc=LAM_MIN, scale=LAM_MAX-LAM_MIN)
    prior_mu  = stats.uniform(loc=MU_MIN, scale=MU_MAX-MU_MIN)

    r_grid   = np.linspace(R_MIN, R_MAX, R_N_SAMPLES)
    lam_grid = np.linspace(LAM_MIN, LAM_MAX, LAM_N_SAMPLES)
    mu_grid  = np.linspace(MU_MIN, MU_MAX, MU_N_SAMPLES)

    data_generator = S3D2(SEED)
    data, label = data_generator.sample_event(config.TRUE.r, config.TRUE.lam, config.TRUE.mu, size=DATA_N_SAMPLES)
    n_sig = np.sum(label==1)
    n_bkg = np.sum(label==0)
    print(f"nb of signal      = {n_sig}")
    print(f"nb of backgrounds = {n_bkg}")


    shape = (R_N_SAMPLES, LAM_N_SAMPLES, MU_N_SAMPLES)
    n_elements = np.prod(shape)
    print(f"3D grid has {n_elements} elements")
    log_likelihood = np.zeros(shape)
    log_prior_proba = np.zeros(shape)
    for i, j, k in tqdm(itertools.product(range(R_N_SAMPLES), range(LAM_N_SAMPLES), range(MU_N_SAMPLES)), total=n_elements):
        log_likelihood[i, j, k] = data_generator.log_proba_density(data, r_grid[i], lam_grid[j], mu_grid[k]).sum()
        log_prior_proba[i, j, k] = prior_r.logpdf(r_grid[i]) \
                                    + prior_lam.logpdf(lam_grid[j]) \
                                    + prior_mu.logpdf(mu_grid[k])

    element_min = (log_likelihood + log_prior_proba).min()
    print("min element = ", element_min)
    posterior_r_lam_mu = softmax(log_likelihood + log_prior_proba)
    n_zeros = (posterior_r_lam_mu == 0).sum()
    n_elements = np.prod(posterior_r_lam_mu.shape)
    print()
    print(f"number of zeros in posterior = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)")

    marginal_r = posterior_r_lam_mu.sum(axis=2).sum(axis=1)
    marginal_lam = posterior_r_lam_mu.sum(axis=2).sum(axis=0)
    marginal_mu = posterior_r_lam_mu.sum(axis=1).sum(axis=0)
    marginal_r_lam = posterior_r_lam_mu.sum(axis=2)
    assert marginal_r.shape == r_grid.shape, "sum along the wrong axis for marginal r"
    assert marginal_lam.shape == lam_grid.shape, "sum along the wrong axis for marginal lam"
    assert marginal_mu.shape == mu_grid.shape, "sum along the wrong axis for marginal mu"
    assert marginal_r_lam.shape == (R_N_SAMPLES, LAM_N_SAMPLES), "sum along the wrong axis for marginal (r, lam)"

    n_zeros = (marginal_r == 0).sum()
    n_elements = np.prod(marginal_r.shape)
    print(f"number of zeros in marginal r = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)")
    n_zeros = (marginal_lam == 0).sum()
    n_elements = np.prod(marginal_lam.shape)
    print(f"number of zeros in marginal lam = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)")
    n_zeros = (marginal_mu == 0).sum()
    n_elements = np.prod(marginal_mu.shape)
    print(f"number of zeros in marginal mu = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)")
    n_zeros = (marginal_r_lam == 0).sum()
    n_elements = np.prod(marginal_r_lam.shape)
    print(f"number of zeros in marginal r_lam = {n_zeros}/{n_elements} ({n_zeros/n_elements*100:2.3f} %)")

    posterior_mu = np.divide(posterior_r_lam_mu, marginal_r_lam.reshape(R_N_SAMPLES, LAM_N_SAMPLES, 1),
        out=np.zeros_like(posterior_r_lam_mu), where=(posterior_r_lam_mu!=0))

    print("probability densities should sum to one")
    # TODO : posterior_mu sum to SOME_N_SAMPLES. is it ok ?
    # TODO : with new division policy posterior_mu/ALPHA_N sums to 1-zero_ration in marginal_y
    #        ... It does not look good
    print(np.sum(posterior_mu)/n_elements, np.sum(posterior_r_lam_mu), np.sum(marginal_r), np.sum(marginal_lam))
    print(np.sum(marginal_r_lam))

    print()
    print("True mu value    =", config.TRUE.mu)
    sig_ratio = n_sig/DATA_N_SAMPLES
    print("Sig ratio       =", sig_ratio)
    expect_mu = expectancy(mu_grid, marginal_mu)
    print("E[mu|x]          =", expect_mu)
    full_var = variance(mu_grid, marginal_mu)
    print("Var[mu|x]        =", full_var)
    std_mu = np.sqrt(full_var)
    print("sqrt(Var[mu|x])  =", std_mu)
    print("argmax_mu p(mu|x) =", mu_grid[np.argmax(marginal_mu)])

    i_max, j_max, k_max = np.unravel_index(np.argmax(log_likelihood), log_likelihood.shape)
    assert np.max(log_likelihood) == log_likelihood[i_max, j_max, k_max], "max and argmax should point to the same value"
    print("argmax_r_lam_mu logp(x|r, lam, mu) =", r_grid[i_max], lam_grid[j_max], mu_grid[k_max])
    stat_err = stat_uncertainty(mu_grid, posterior_mu, marginal_r_lam)
    print("stat_uncertainty=", stat_err)
    stat_err = stat_uncertainty2(mu_grid, posterior_mu, marginal_r_lam)
    print("stat_uncertainty=", stat_err)
    stat_err = stat_uncertainty3(mu_grid, posterior_mu, marginal_r_lam)
    print("stat_uncertainty=", stat_err)
    print("syst_uncertainty=", full_var - stat_err)
    syst_err = syst_uncertainty(mu_grid, posterior_mu, marginal_r_lam, marginal_mu)
    print("syst_uncertainty=", syst_err)
    syst_err = syst_uncertainty2(mu_grid, posterior_mu, marginal_r_lam)
    print("syst_uncertainty=", syst_err)
    syst_err = syst_uncertainty3(mu_grid, posterior_mu, marginal_r_lam)
    print("syst_uncertainty=", syst_err)

    print()
    print("check marginals")
    print("mu  ", marginal_mu.min(), marginal_mu.max())
    print("lam ", marginal_lam.min(), marginal_lam.max())
    print("r   ", marginal_r.min(), marginal_r.max())
    print("check posterior")
    print("p(y|x)  ", posterior_mu.min(), posterior_mu.max())
    print("p(y|x,a)", posterior_r_lam_mu.min(), posterior_r_lam_mu.max())


    # return None

    plt.axvline(config.TRUE.mu, c="orange", label="true mu")
    plt.axvline(config.TRUE.mu-std_mu, c="orange", label="true mu - std(mu)")
    plt.axvline(config.TRUE.mu+std_mu, c="orange", label="true mu + std(mu)")
    plt.axvline(sig_ratio, c="red", label="signal ratio")
    plt.axvline(expect_mu, c="green", label="E[mu|x]")
    plt.plot(mu_grid, marginal_mu, label="posterior")
    plt.xlabel("mu")
    plt.ylabel("proba density")
    plt.title("posterior marginal proba of mu vs mu values")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'marginal_mu.png'))
    plt.clf()


    plt.plot(lam_grid, marginal_lam, label="posterior")
    plt.axvline(config.TRUE.lam, c="orange", label="true lambda")
    plt.xlabel("lambda")
    plt.ylabel("proba density")
    plt.title("posterior marginal proba of lam vs lam values")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'marginal_lam.png'))
    plt.clf()

    plt.plot(r_grid, marginal_r, label="posterior")
    plt.axvline(config.TRUE.r, c="orange", label="true r")
    plt.xlabel("r")
    plt.ylabel("proba density")
    plt.title("posterior marginal proba of r vs r values")
    plt.legend()
    plt.savefig(os.path.join(DIRECTORY, 'marginal_r.png'))
    plt.clf()