Beispiel #1
0
def get_FOM_vs_mass(mod, dataset, FOM_callable):
    print("Doing model {}".format(mod['file']))

    FOMs = []

    sig = dataset.signal_dataset
    sig_weights = dataset.signal_weights
    bkg = dataset.background_dataset
    bkg_weights = dataset.background_weights

    for m in resonant_signal_masses:
        print("Doing mass {}...".format(m))

        # Only keep events with that mass for the signal
        sig_mask = sig[:, -1] == m
        sig_m = sig[sig_mask]
        if mod['no_mass_column']:
            sig_m = sig_m[:, :-1]
        sig_pred_m = mod['model'].predict(sig_m, batch_size=20000)[:, 0]
        sig_weights_m = expected_limits[m] * sig_weights[sig_mask]

        # For the background, set the mass input to that of the signal
        # We need ALL the background events to ensure proper normalisation to cross section
        bkg_m = bkg
        bkg_m[:, -1] = m
        if mod['no_mass_column']:
            bkg_m = bkg[:, :-1]
        bkg_pred_m = mod['model'].predict(bkg_m, batch_size=20000)[:, 0]

        # Also histograms without weights to compute the statistical uncertainty on MC
        sig_binned = plotTools.binDataset(sig_pred_m,
                                          LUMI * sig_weights_m,
                                          bins=n_bins,
                                          range=[0, 1])
        bkg_binned = plotTools.binDataset(bkg_pred_m,
                                          LUMI * bkg_weights,
                                          bins=n_bins,
                                          range=[0, 1])

        FOM = FOM_callable(sig_binned, bkg_binned)

        FOMs.append(FOM)

    return FOMs
def get_test_limit(dataset, model, idx=None):
    LUMI = 35900

    if idx is None:
        sig = dataset.test_signal_dataset
        bkg = dataset.test_background_dataset
        sig_w = dataset.test_signal_weights
        bkg_w = dataset.test_background_weights

    else:
        data = dataset.training_dataset[idx]
        weights = dataset.training_weights[idx]
        targets = dataset.training_targets[idx]

        idx_sig = (targets == 1)
        idx_bkg = (targets == 0)

        sig = data[idx_sig]
        bkg = data[idx_bkg]
        sig_w = weights[idx_sig]
        # The training signals are not correctly weighted...
        # Normalise the weights to have a "reasonable" total signal yield,
        # so that the limit-finder converges.
        sig_w /= np.sum(sig_w) * LUMI
        bkg_w = weights[idx_bkg]

    sig_pred = model.predict(sig, batch_size=20000)[:, 0]
    bkg_pred = model.predict(bkg, batch_size=20000)[:, 0]

    sig_binned = plotTools.binDataset(sig_pred,
                                      LUMI * sig_w,
                                      bins=50,
                                      range=[0, 1])
    bkg_binned = plotTools.binDataset(bkg_pred,
                                      LUMI * bkg_w,
                                      bins=50,
                                      range=[0, 1])

    return get_median_expected_limit(sig_binned, bkg_binned, guess=100)
    signal_predictions = np.concatenate(
        (testing_signal_predictions, training_signal_predictions), axis=0)
    background_predictions = np.concatenate(
        (testing_background_predictions, training_background_predictions),
        axis=0)

    signal_weights = np.concatenate(
        (testing_signal_weights, training_signal_weights), axis=0)
    background_weights = np.concatenate(
        (testing_background_weights, training_background_weights), axis=0)

    # signal_predictions = np.concatenate([training_signal_predictions], axis=0)
    # background_predictions = np.concatenate([training_background_predictions], axis=0)

    n_signal, _, binning = plotTools.binDataset(signal_predictions,
                                                signal_weights,
                                                bins=40,
                                                range=[0, 1])
    n_background, _, _ = plotTools.binDataset(background_predictions,
                                              background_weights,
                                              bins=binning)

    x, y = plotTools.get_roc(n_signal, n_background)
    roc_ax.plot(x,
                y,
                '-',
                color=style[parameters]['color'],
                lw=2,
                label=style[parameters]['legend'])

    auc.append(metrics.auc(x, y, reorder=True))
    xlabels.append(str(parameters))
Beispiel #4
0
model = keras.models.load_model(output_model_filename)

export_for_lwtnn(model, output_model_filename)
draw_non_resonant_training_plots(model,
                                 dataset,
                                 output_folder,
                                 split_by_parameters=add_parameters_columns)
draw_nn_vs_independent(model, dataset, np.linspace(0, 400, 20), output_folder)

# Compute limit on test set
sig_pred = model.predict(dataset.test_signal_dataset, batch_size=20000)[:, 0]
bkg_pred = model.predict(dataset.test_background_dataset, batch_size=20000)[:,
                                                                            0]

LUMI = 35900

sig_binned = plotTools.binDataset(sig_pred,
                                  LUMI * dataset.test_signal_weights,
                                  bins=50,
                                  range=[0, 1])
bkg_binned = plotTools.binDataset(bkg_pred,
                                  LUMI * dataset.test_background_weights,
                                  bins=50,
                                  range=[0, 1])

limit = get_median_expected_limit(sig_binned, bkg_binned, guess=10)

print("Expected limit from test set: {} fb".format(limit))

print("All done. Training time: %s" % str(training_time))
Beispiel #5
0
def draw_resonant_training_plots(model, dataset, output_folder, split_by_mass=False):
    # Draw inputs
    output_input_plots = os.path.join(output_folder, 'inputs')
    if not os.path.exists(output_input_plots):
        os.makedirs(output_input_plots)

    dataset.draw_inputs(output_input_plots)
    dataset.draw_correlations(output_folder)

    training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets()
    training_weights = dataset.get_training_combined_weights()

    testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets()
    testing_weights = dataset.get_testing_combined_weights()

    print("Evaluating model performances...")

    training_signal_weights, training_background_weights = dataset.get_training_weights()
    testing_signal_weights, testing_background_weights = dataset.get_testing_weights()

    training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model)
    training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model)

    print("Done.")

    print("Plotting time...")

    # NN output
    plotTools.drawNNOutput(training_background_predictions, testing_background_predictions,
                 training_signal_predictions, testing_signal_predictions,
                 training_background_weights, testing_background_weights,
                 training_signal_weights, testing_signal_weights,
                 output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50)

    # ROC curve
    binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1])
    binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins)
    plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf")

    if split_by_mass:
        output_folder = os.path.join(output_folder, 'splitted_by_mass')
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        training_signal_dataset, training_background_dataset = dataset.get_training_datasets()
        testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets()
        for m in dataset.resonant_masses:
            print("  Plotting NN output and ROC curve for M=%d" % m)

            training_signal_mask = training_signal_dataset[:,-1] == m
            training_background_mask = training_background_dataset[:,-1] == m
            testing_signal_mask = testing_signal_dataset[:,-1] == m
            testing_background_mask = testing_background_dataset[:,-1] == m

            p_training_background_predictions = training_background_predictions[training_background_mask]
            p_testing_background_predictions = testing_background_predictions[testing_background_mask]
            p_training_signal_predictions = training_signal_predictions[training_signal_mask]
            p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask]

            p_training_background_weights = training_background_weights[training_background_mask]
            p_testing_background_weights = testing_background_weights[testing_background_mask]
            p_training_signal_weights = training_signal_weights[training_signal_mask]
            p_testing_signal_weights = testing_signal_weights[testing_signal_mask]
            plotTools.drawNNOutput(
                         p_training_background_predictions, p_testing_background_predictions,
                         p_training_signal_predictions, p_testing_signal_predictions,
                         p_training_background_weights, p_testing_background_weights,
                         p_training_signal_weights, p_testing_signal_weights,
                         output_dir=output_folder, output_name="nn_output_fixed_M%d"% (m), form=".pdf",
                         bins=50)

            binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1])
            binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins)
            plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_M_%d" % (m),form=".pdf")
    print("Done")
Beispiel #6
0
def draw_non_resonant_training_plots(model, dataset, output_folder, split_by_parameters=False):

    # plot(model, to_file=os.path.join(output_folder, "model.pdf"))

    # Draw inputs
    output_input_plots = os.path.join(output_folder, 'inputs')
    if not os.path.exists(output_input_plots):
        os.makedirs(output_input_plots)

    dataset.draw_inputs(output_input_plots)

    training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets()
    training_weights = dataset.get_training_combined_weights()

    testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets()
    testing_weights = dataset.get_testing_combined_weights()

    print("Evaluating model performances...")

    training_signal_weights, training_background_weights = dataset.get_training_weights()
    testing_signal_weights, testing_background_weights = dataset.get_testing_weights()

    training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model)
    training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model)

    print("Done.")

    print("Plotting time...")

    # NN output
    plotTools.drawNNOutput(training_background_predictions, testing_background_predictions,
                 training_signal_predictions, testing_signal_predictions,
                 training_background_weights, testing_background_weights,
                 training_signal_weights, testing_signal_weights,
                 output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50)

    # ROC curve
    binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1])
    binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins)
    plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf")

    if split_by_parameters:
        output_folder = os.path.join(output_folder, 'splitted_by_parameters')
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        training_signal_dataset, training_background_dataset = dataset.get_training_datasets()
        testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets()
        for parameters in dataset.get_nonresonant_parameters_list():
            user_parameters = ['{:.2f}'.format(x) for x in dataset.positive_to_user_parameters(parameters)]

            print("  Plotting NN output and ROC curve for %s" % str(user_parameters))

            training_signal_mask = (training_signal_dataset[:,-1] == parameters[1]) & (training_signal_dataset[:,-2] == parameters[0])
            training_background_mask = (training_background_dataset[:,-1] == parameters[1]) & (training_background_dataset[:,-2] == parameters[0])
            testing_signal_mask = (testing_signal_dataset[:,-1] == parameters[1]) & (testing_signal_dataset[:,-2] == parameters[0])
            testing_background_mask = (testing_background_dataset[:,-1] == parameters[1]) & (testing_background_dataset[:,-2] == parameters[0])

            p_training_background_predictions = training_background_predictions[training_background_mask]
            p_testing_background_predictions = testing_background_predictions[testing_background_mask]
            p_training_signal_predictions = training_signal_predictions[training_signal_mask]
            p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask]

            p_training_background_weights = training_background_weights[training_background_mask]
            p_testing_background_weights = testing_background_weights[testing_background_mask]
            p_training_signal_weights = training_signal_weights[training_signal_mask]
            p_testing_signal_weights = testing_signal_weights[testing_signal_mask]

            suffix = format_nonresonant_parameters(user_parameters)
            plotTools.drawNNOutput(
                         p_training_background_predictions, p_testing_background_predictions,
                         p_training_signal_predictions, p_testing_signal_predictions,
                         p_training_background_weights, p_testing_background_weights,
                         p_training_signal_weights, p_testing_signal_weights,
                         output_dir=output_folder, output_name="nn_output_fixed_parameters_%s"%(suffix),form=".pdf", bins=50)

            binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1])
            binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins)
            plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_parameters_%s" % (suffix),form=".pdf")
    print("Done")
    all_background_predictions = dataset.get_background_predictions(all_model)

    ignore_last_columns = 0
    if 'no_mass_column' in models['dedicated'][m] and models['dedicated'][m][
            'no_mass_column']:
        ignore_last_columns = 1
    dedicated_signal_predictions = dataset.get_signal_predictions(
        dedicated_model, ignore_last_columns=ignore_last_columns)
    dedicated_background_predictions = dataset.get_background_predictions(
        dedicated_model, ignore_last_columns=ignore_last_columns)

    print("Done.")

    all_n_signal, _, binning = plotTools.binDataset(
        all_signal_predictions,
        dataset.get_signal_weights(),
        bins=50,
        range=[0, 1])

    all_n_background, _, _ = plotTools.binDataset(
        all_background_predictions,
        dataset.get_background_weights(),
        bins=binning)

    dedicated_n_signal, _, _ = plotTools.binDataset(
        dedicated_signal_predictions,
        dataset.get_signal_weights(),
        bins=binning)
    dedicated_n_background, _, _ = plotTools.binDataset(
        dedicated_background_predictions,
        dataset.get_background_weights(),
Beispiel #8
0
    draw_non_resonant_training_plots(
        model,
        dataset,
        output_folder,
        split_by_parameters=add_parameters_columns)

    # Do ROC of SR vs. BR ONLY for background
    bkg_SR_cut = dataset.test_background_extra_dataset[:, 0].astype(bool)
    #bkg_SR_cut = (dataset.test_background_extra_dataset >= 75) & (dataset.test_background_extra_dataset < 140)
    #bkg_SR_cut = bkg_SR_cut[:,0]
    scores_bkg = model.predict(dataset.test_background_dataset,
                               batch_size=20000)[:, 0]
    score_bkg_SR, _, bins = plotTools.binDataset(
        scores_bkg[bkg_SR_cut],
        dataset.test_background_weights[bkg_SR_cut],
        bins=1000,
        range=[0, 1])
    score_bkg_BR, _, _ = plotTools.binDataset(
        scores_bkg[~bkg_SR_cut],
        dataset.test_background_weights[~bkg_SR_cut],
        bins=bins)
    plotTools.draw_roc(score_bkg_SR,
                       score_bkg_BR,
                       output_dir=output_folder,
                       output_name="roc_SR_vs_BR_bkg.pdf")

    # K-S test for background SR vs. BR
    KS = stats.ks_2samp(scores_bkg[bkg_SR_cut], scores_bkg[~bkg_SR_cut])
    print "K-S test for background, SR vs. BR: TS = {}, p-val = {}".format(*KS)