def get_FOM_vs_mass(mod, dataset, FOM_callable): print("Doing model {}".format(mod['file'])) FOMs = [] sig = dataset.signal_dataset sig_weights = dataset.signal_weights bkg = dataset.background_dataset bkg_weights = dataset.background_weights for m in resonant_signal_masses: print("Doing mass {}...".format(m)) # Only keep events with that mass for the signal sig_mask = sig[:, -1] == m sig_m = sig[sig_mask] if mod['no_mass_column']: sig_m = sig_m[:, :-1] sig_pred_m = mod['model'].predict(sig_m, batch_size=20000)[:, 0] sig_weights_m = expected_limits[m] * sig_weights[sig_mask] # For the background, set the mass input to that of the signal # We need ALL the background events to ensure proper normalisation to cross section bkg_m = bkg bkg_m[:, -1] = m if mod['no_mass_column']: bkg_m = bkg[:, :-1] bkg_pred_m = mod['model'].predict(bkg_m, batch_size=20000)[:, 0] # Also histograms without weights to compute the statistical uncertainty on MC sig_binned = plotTools.binDataset(sig_pred_m, LUMI * sig_weights_m, bins=n_bins, range=[0, 1]) bkg_binned = plotTools.binDataset(bkg_pred_m, LUMI * bkg_weights, bins=n_bins, range=[0, 1]) FOM = FOM_callable(sig_binned, bkg_binned) FOMs.append(FOM) return FOMs
def get_test_limit(dataset, model, idx=None): LUMI = 35900 if idx is None: sig = dataset.test_signal_dataset bkg = dataset.test_background_dataset sig_w = dataset.test_signal_weights bkg_w = dataset.test_background_weights else: data = dataset.training_dataset[idx] weights = dataset.training_weights[idx] targets = dataset.training_targets[idx] idx_sig = (targets == 1) idx_bkg = (targets == 0) sig = data[idx_sig] bkg = data[idx_bkg] sig_w = weights[idx_sig] # The training signals are not correctly weighted... # Normalise the weights to have a "reasonable" total signal yield, # so that the limit-finder converges. sig_w /= np.sum(sig_w) * LUMI bkg_w = weights[idx_bkg] sig_pred = model.predict(sig, batch_size=20000)[:, 0] bkg_pred = model.predict(bkg, batch_size=20000)[:, 0] sig_binned = plotTools.binDataset(sig_pred, LUMI * sig_w, bins=50, range=[0, 1]) bkg_binned = plotTools.binDataset(bkg_pred, LUMI * bkg_w, bins=50, range=[0, 1]) return get_median_expected_limit(sig_binned, bkg_binned, guess=100)
signal_predictions = np.concatenate( (testing_signal_predictions, training_signal_predictions), axis=0) background_predictions = np.concatenate( (testing_background_predictions, training_background_predictions), axis=0) signal_weights = np.concatenate( (testing_signal_weights, training_signal_weights), axis=0) background_weights = np.concatenate( (testing_background_weights, training_background_weights), axis=0) # signal_predictions = np.concatenate([training_signal_predictions], axis=0) # background_predictions = np.concatenate([training_background_predictions], axis=0) n_signal, _, binning = plotTools.binDataset(signal_predictions, signal_weights, bins=40, range=[0, 1]) n_background, _, _ = plotTools.binDataset(background_predictions, background_weights, bins=binning) x, y = plotTools.get_roc(n_signal, n_background) roc_ax.plot(x, y, '-', color=style[parameters]['color'], lw=2, label=style[parameters]['legend']) auc.append(metrics.auc(x, y, reorder=True)) xlabels.append(str(parameters))
model = keras.models.load_model(output_model_filename) export_for_lwtnn(model, output_model_filename) draw_non_resonant_training_plots(model, dataset, output_folder, split_by_parameters=add_parameters_columns) draw_nn_vs_independent(model, dataset, np.linspace(0, 400, 20), output_folder) # Compute limit on test set sig_pred = model.predict(dataset.test_signal_dataset, batch_size=20000)[:, 0] bkg_pred = model.predict(dataset.test_background_dataset, batch_size=20000)[:, 0] LUMI = 35900 sig_binned = plotTools.binDataset(sig_pred, LUMI * dataset.test_signal_weights, bins=50, range=[0, 1]) bkg_binned = plotTools.binDataset(bkg_pred, LUMI * dataset.test_background_weights, bins=50, range=[0, 1]) limit = get_median_expected_limit(sig_binned, bkg_binned, guess=10) print("Expected limit from test set: {} fb".format(limit)) print("All done. Training time: %s" % str(training_time))
def draw_resonant_training_plots(model, dataset, output_folder, split_by_mass=False): # Draw inputs output_input_plots = os.path.join(output_folder, 'inputs') if not os.path.exists(output_input_plots): os.makedirs(output_input_plots) dataset.draw_inputs(output_input_plots) dataset.draw_correlations(output_folder) training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets() training_weights = dataset.get_training_combined_weights() testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets() testing_weights = dataset.get_testing_combined_weights() print("Evaluating model performances...") training_signal_weights, training_background_weights = dataset.get_training_weights() testing_signal_weights, testing_background_weights = dataset.get_testing_weights() training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model) training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model) print("Done.") print("Plotting time...") # NN output plotTools.drawNNOutput(training_background_predictions, testing_background_predictions, training_signal_predictions, testing_signal_predictions, training_background_weights, testing_background_weights, training_signal_weights, testing_signal_weights, output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50) # ROC curve binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf") if split_by_mass: output_folder = os.path.join(output_folder, 'splitted_by_mass') if not os.path.exists(output_folder): os.makedirs(output_folder) training_signal_dataset, training_background_dataset = dataset.get_training_datasets() testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets() for m in dataset.resonant_masses: print(" Plotting NN output and ROC curve for M=%d" % m) training_signal_mask = training_signal_dataset[:,-1] == m training_background_mask = training_background_dataset[:,-1] == m testing_signal_mask = testing_signal_dataset[:,-1] == m testing_background_mask = testing_background_dataset[:,-1] == m p_training_background_predictions = training_background_predictions[training_background_mask] p_testing_background_predictions = testing_background_predictions[testing_background_mask] p_training_signal_predictions = training_signal_predictions[training_signal_mask] p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask] p_training_background_weights = training_background_weights[training_background_mask] p_testing_background_weights = testing_background_weights[testing_background_mask] p_training_signal_weights = training_signal_weights[training_signal_mask] p_testing_signal_weights = testing_signal_weights[testing_signal_mask] plotTools.drawNNOutput( p_training_background_predictions, p_testing_background_predictions, p_training_signal_predictions, p_testing_signal_predictions, p_training_background_weights, p_testing_background_weights, p_training_signal_weights, p_testing_signal_weights, output_dir=output_folder, output_name="nn_output_fixed_M%d"% (m), form=".pdf", bins=50) binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_M_%d" % (m),form=".pdf") print("Done")
def draw_non_resonant_training_plots(model, dataset, output_folder, split_by_parameters=False): # plot(model, to_file=os.path.join(output_folder, "model.pdf")) # Draw inputs output_input_plots = os.path.join(output_folder, 'inputs') if not os.path.exists(output_input_plots): os.makedirs(output_input_plots) dataset.draw_inputs(output_input_plots) training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets() training_weights = dataset.get_training_combined_weights() testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets() testing_weights = dataset.get_testing_combined_weights() print("Evaluating model performances...") training_signal_weights, training_background_weights = dataset.get_training_weights() testing_signal_weights, testing_background_weights = dataset.get_testing_weights() training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model) training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model) print("Done.") print("Plotting time...") # NN output plotTools.drawNNOutput(training_background_predictions, testing_background_predictions, training_signal_predictions, testing_signal_predictions, training_background_weights, testing_background_weights, training_signal_weights, testing_signal_weights, output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50) # ROC curve binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf") if split_by_parameters: output_folder = os.path.join(output_folder, 'splitted_by_parameters') if not os.path.exists(output_folder): os.makedirs(output_folder) training_signal_dataset, training_background_dataset = dataset.get_training_datasets() testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets() for parameters in dataset.get_nonresonant_parameters_list(): user_parameters = ['{:.2f}'.format(x) for x in dataset.positive_to_user_parameters(parameters)] print(" Plotting NN output and ROC curve for %s" % str(user_parameters)) training_signal_mask = (training_signal_dataset[:,-1] == parameters[1]) & (training_signal_dataset[:,-2] == parameters[0]) training_background_mask = (training_background_dataset[:,-1] == parameters[1]) & (training_background_dataset[:,-2] == parameters[0]) testing_signal_mask = (testing_signal_dataset[:,-1] == parameters[1]) & (testing_signal_dataset[:,-2] == parameters[0]) testing_background_mask = (testing_background_dataset[:,-1] == parameters[1]) & (testing_background_dataset[:,-2] == parameters[0]) p_training_background_predictions = training_background_predictions[training_background_mask] p_testing_background_predictions = testing_background_predictions[testing_background_mask] p_training_signal_predictions = training_signal_predictions[training_signal_mask] p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask] p_training_background_weights = training_background_weights[training_background_mask] p_testing_background_weights = testing_background_weights[testing_background_mask] p_training_signal_weights = training_signal_weights[training_signal_mask] p_testing_signal_weights = testing_signal_weights[testing_signal_mask] suffix = format_nonresonant_parameters(user_parameters) plotTools.drawNNOutput( p_training_background_predictions, p_testing_background_predictions, p_training_signal_predictions, p_testing_signal_predictions, p_training_background_weights, p_testing_background_weights, p_training_signal_weights, p_testing_signal_weights, output_dir=output_folder, output_name="nn_output_fixed_parameters_%s"%(suffix),form=".pdf", bins=50) binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_parameters_%s" % (suffix),form=".pdf") print("Done")
all_background_predictions = dataset.get_background_predictions(all_model) ignore_last_columns = 0 if 'no_mass_column' in models['dedicated'][m] and models['dedicated'][m][ 'no_mass_column']: ignore_last_columns = 1 dedicated_signal_predictions = dataset.get_signal_predictions( dedicated_model, ignore_last_columns=ignore_last_columns) dedicated_background_predictions = dataset.get_background_predictions( dedicated_model, ignore_last_columns=ignore_last_columns) print("Done.") all_n_signal, _, binning = plotTools.binDataset( all_signal_predictions, dataset.get_signal_weights(), bins=50, range=[0, 1]) all_n_background, _, _ = plotTools.binDataset( all_background_predictions, dataset.get_background_weights(), bins=binning) dedicated_n_signal, _, _ = plotTools.binDataset( dedicated_signal_predictions, dataset.get_signal_weights(), bins=binning) dedicated_n_background, _, _ = plotTools.binDataset( dedicated_background_predictions, dataset.get_background_weights(),
draw_non_resonant_training_plots( model, dataset, output_folder, split_by_parameters=add_parameters_columns) # Do ROC of SR vs. BR ONLY for background bkg_SR_cut = dataset.test_background_extra_dataset[:, 0].astype(bool) #bkg_SR_cut = (dataset.test_background_extra_dataset >= 75) & (dataset.test_background_extra_dataset < 140) #bkg_SR_cut = bkg_SR_cut[:,0] scores_bkg = model.predict(dataset.test_background_dataset, batch_size=20000)[:, 0] score_bkg_SR, _, bins = plotTools.binDataset( scores_bkg[bkg_SR_cut], dataset.test_background_weights[bkg_SR_cut], bins=1000, range=[0, 1]) score_bkg_BR, _, _ = plotTools.binDataset( scores_bkg[~bkg_SR_cut], dataset.test_background_weights[~bkg_SR_cut], bins=bins) plotTools.draw_roc(score_bkg_SR, score_bkg_BR, output_dir=output_folder, output_name="roc_SR_vs_BR_bkg.pdf") # K-S test for background SR vs. BR KS = stats.ks_2samp(scores_bkg[bkg_SR_cut], scores_bkg[~bkg_SR_cut]) print "K-S test for background, SR vs. BR: TS = {}, p-val = {}".format(*KS)