training_signal_weights, training_background_weights = dataset.get_training_weights() testing_signal_weights, testing_background_weights = dataset.get_testing_weights() training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model) training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model) print("Done") print("Doing some plots...") n_background, n_signal = plotTools.drawNNOutput(training_background_predictions, testing_background_predictions, training_signal_predictions, testing_signal_predictions, training_background_weights, testing_background_weights, training_signal_weights, testing_signal_weights, output_dir=output_folder, output_name="nn_output_all_%s.pdf" % suffix) plotTools.draw_roc(n_signal, n_background, output_dir=output_folder, output_name="roc_curve_all_%s.pdf" % suffix) # Split by training mass for m in signal_masses: training_signal_dataset, training_background_dataset = dataset.get_training_datasets() testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets() training_signal_mask = training_signal_dataset[:,-1] == m training_background_mask = training_background_dataset[:,-1] == m testing_signal_mask = testing_signal_dataset[:,-1] == m testing_background_mask = testing_background_dataset[:,-1] == m mass_training_background_predictions = training_background_predictions[training_background_mask] mass_training_signal_predictions = training_signal_predictions[training_signal_mask] mass_training_background_weights = training_background_weights[training_background_mask] mass_training_signal_weights = training_signal_weights[training_signal_mask]
def draw_non_resonant_training_plots(model, dataset, output_folder, split_by_parameters=False): # plot(model, to_file=os.path.join(output_folder, "model.pdf")) # Draw inputs output_input_plots = os.path.join(output_folder, 'inputs') if not os.path.exists(output_input_plots): os.makedirs(output_input_plots) dataset.draw_inputs(output_input_plots) training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets() training_weights = dataset.get_training_combined_weights() testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets() testing_weights = dataset.get_testing_combined_weights() print("Evaluating model performances...") training_signal_weights, training_background_weights = dataset.get_training_weights() testing_signal_weights, testing_background_weights = dataset.get_testing_weights() training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model) training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model) print("Done.") print("Plotting time...") # NN output plotTools.drawNNOutput(training_background_predictions, testing_background_predictions, training_signal_predictions, testing_signal_predictions, training_background_weights, testing_background_weights, training_signal_weights, testing_signal_weights, output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50) # ROC curve binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf") if split_by_parameters: output_folder = os.path.join(output_folder, 'splitted_by_parameters') if not os.path.exists(output_folder): os.makedirs(output_folder) training_signal_dataset, training_background_dataset = dataset.get_training_datasets() testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets() for parameters in dataset.get_nonresonant_parameters_list(): user_parameters = ['{:.2f}'.format(x) for x in dataset.positive_to_user_parameters(parameters)] print(" Plotting NN output and ROC curve for %s" % str(user_parameters)) training_signal_mask = (training_signal_dataset[:,-1] == parameters[1]) & (training_signal_dataset[:,-2] == parameters[0]) training_background_mask = (training_background_dataset[:,-1] == parameters[1]) & (training_background_dataset[:,-2] == parameters[0]) testing_signal_mask = (testing_signal_dataset[:,-1] == parameters[1]) & (testing_signal_dataset[:,-2] == parameters[0]) testing_background_mask = (testing_background_dataset[:,-1] == parameters[1]) & (testing_background_dataset[:,-2] == parameters[0]) p_training_background_predictions = training_background_predictions[training_background_mask] p_testing_background_predictions = testing_background_predictions[testing_background_mask] p_training_signal_predictions = training_signal_predictions[training_signal_mask] p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask] p_training_background_weights = training_background_weights[training_background_mask] p_testing_background_weights = testing_background_weights[testing_background_mask] p_training_signal_weights = training_signal_weights[training_signal_mask] p_testing_signal_weights = testing_signal_weights[testing_signal_mask] suffix = format_nonresonant_parameters(user_parameters) plotTools.drawNNOutput( p_training_background_predictions, p_testing_background_predictions, p_training_signal_predictions, p_testing_signal_predictions, p_training_background_weights, p_testing_background_weights, p_training_signal_weights, p_testing_signal_weights, output_dir=output_folder, output_name="nn_output_fixed_parameters_%s"%(suffix),form=".pdf", bins=50) binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_parameters_%s" % (suffix),form=".pdf") print("Done")
def draw_resonant_training_plots(model, dataset, output_folder, split_by_mass=False): # Draw inputs output_input_plots = os.path.join(output_folder, 'inputs') if not os.path.exists(output_input_plots): os.makedirs(output_input_plots) dataset.draw_inputs(output_input_plots) dataset.draw_correlations(output_folder) training_dataset, training_targets = dataset.get_training_combined_dataset_and_targets() training_weights = dataset.get_training_combined_weights() testing_dataset, testing_targets = dataset.get_testing_combined_dataset_and_targets() testing_weights = dataset.get_testing_combined_weights() print("Evaluating model performances...") training_signal_weights, training_background_weights = dataset.get_training_weights() testing_signal_weights, testing_background_weights = dataset.get_testing_weights() training_signal_predictions, testing_signal_predictions = dataset.get_training_testing_signal_predictions(model) training_background_predictions, testing_background_predictions = dataset.get_training_testing_background_predictions(model) print("Done.") print("Plotting time...") # NN output plotTools.drawNNOutput(training_background_predictions, testing_background_predictions, training_signal_predictions, testing_signal_predictions, training_background_weights, testing_background_weights, training_signal_weights, testing_signal_weights, output_dir=output_folder, output_name="nn_output",form=".pdf", bins=50) # ROC curve binned_training_background_predictions, _, bins = plotTools.binDataset(training_background_predictions, training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(training_signal_predictions, training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve",form=".pdf") if split_by_mass: output_folder = os.path.join(output_folder, 'splitted_by_mass') if not os.path.exists(output_folder): os.makedirs(output_folder) training_signal_dataset, training_background_dataset = dataset.get_training_datasets() testing_signal_dataset, testing_background_dataset = dataset.get_testing_datasets() for m in dataset.resonant_masses: print(" Plotting NN output and ROC curve for M=%d" % m) training_signal_mask = training_signal_dataset[:,-1] == m training_background_mask = training_background_dataset[:,-1] == m testing_signal_mask = testing_signal_dataset[:,-1] == m testing_background_mask = testing_background_dataset[:,-1] == m p_training_background_predictions = training_background_predictions[training_background_mask] p_testing_background_predictions = testing_background_predictions[testing_background_mask] p_training_signal_predictions = training_signal_predictions[training_signal_mask] p_testing_signal_predictions = testing_signal_predictions[testing_signal_mask] p_training_background_weights = training_background_weights[training_background_mask] p_testing_background_weights = testing_background_weights[testing_background_mask] p_training_signal_weights = training_signal_weights[training_signal_mask] p_testing_signal_weights = testing_signal_weights[testing_signal_mask] plotTools.drawNNOutput( p_training_background_predictions, p_testing_background_predictions, p_training_signal_predictions, p_testing_signal_predictions, p_training_background_weights, p_testing_background_weights, p_training_signal_weights, p_testing_signal_weights, output_dir=output_folder, output_name="nn_output_fixed_M%d"% (m), form=".pdf", bins=50) binned_training_background_predictions, _, bins = plotTools.binDataset(p_training_background_predictions, p_training_background_weights, bins=50, range=[0, 1]) binned_training_signal_predictions, _, _ = plotTools.binDataset(p_training_signal_predictions, p_training_signal_weights, bins=bins) plotTools.draw_roc(binned_training_signal_predictions, binned_training_background_predictions, output_dir=output_folder, output_name="roc_curve_fixed_M_%d" % (m),form=".pdf") print("Done")
bkg_SR_cut = dataset.test_background_extra_dataset[:, 0].astype(bool) #bkg_SR_cut = (dataset.test_background_extra_dataset >= 75) & (dataset.test_background_extra_dataset < 140) #bkg_SR_cut = bkg_SR_cut[:,0] scores_bkg = model.predict(dataset.test_background_dataset, batch_size=20000)[:, 0] score_bkg_SR, _, bins = plotTools.binDataset( scores_bkg[bkg_SR_cut], dataset.test_background_weights[bkg_SR_cut], bins=1000, range=[0, 1]) score_bkg_BR, _, _ = plotTools.binDataset( scores_bkg[~bkg_SR_cut], dataset.test_background_weights[~bkg_SR_cut], bins=bins) plotTools.draw_roc(score_bkg_SR, score_bkg_BR, output_dir=output_folder, output_name="roc_SR_vs_BR_bkg.pdf") # K-S test for background SR vs. BR KS = stats.ks_2samp(scores_bkg[bkg_SR_cut], scores_bkg[~bkg_SR_cut]) print "K-S test for background, SR vs. BR: TS = {}, p-val = {}".format(*KS) #print roc_auc_score(bkg_SR_cut, scores_bkg, sample_weight=dataset.test_background_weights) # Do ROC ONLY in SR region sig_SR_cut = dataset.test_signal_extra_dataset[:, 0].astype(bool) #sig_SR_cut = (dataset.test_signal_extra_dataset >= 75) & (dataset.test_signal_extra_dataset < 140) #sig_SR_cut = sig_SR_cut[:,0] scores_sig = model.predict(dataset.test_signal_dataset[sig_SR_cut], batch_size=20000)[:, 0] score_sig_SR, _, _ = plotTools.binDataset(