def main(): numpy.set_printoptions(precision=2, linewidth=400, threshold=100, sign=' ') var_edges = numpy.linspace(200, 1200, 31) kv_val = 1.0 #num_bins = 21 #k2v_val_range = numpy.linspace(-2,4,num_bins) #kl_val_range = numpy.linspace(-14,16,num_bins) truth_data_files = fileio_utils.read_coupling_file(coupling_file='basis_files/truth_LHE_couplings.dat') truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data(truth_data_files.values(), var_edges, stat_limit=None, emulateSelection=True)
def single_negative_weight_map(basis_parameters, name_suffix='_base', truth_level=False): var_edges = numpy.linspace(200, 1200, 31) kv_val = 1.0 num_kappa_bins = 100 k2v_val_range = numpy.linspace(-2,4,num_kappa_bins+1) kl_val_range = numpy.linspace(-14,16,num_kappa_bins+1) if truth_level: truth_data_files = fileio_utils.read_coupling_file(coupling_file='basis_files/truth_LHE_couplings.dat') basis_files = [ truth_data_files[coupling] for coupling in basis_parameters ] base_weights, base_errors = fileio_utils.extract_lhe_truth_data(basis_files, var_edges) else: data_files = fileio_utils.read_coupling_file() base_events_list = fileio_utils.get_events(basis_parameters, data_files) base_histograms = [ fileio_utils.retrieve_reco_weights(var_edges, base_events) for base_events in base_events_list ] base_weights, base_errors = numpy.array(list(zip(*base_histograms))) integral = get_Nweight_sum(basis_parameters, base_weights, kv_val, k2v_val_range, kl_val_range, grid=False) negative_weight_grid = get_Nweight_sum(basis_parameters, base_weights, kv_val, k2v_val_range, kl_val_range, grid=True) draw_error_map(basis_parameters, var_edges, kv_val, k2v_val_range, kl_val_range, negative_weight_grid, name_suffix=name_suffix, title_suffix=f'Integral={int(integral)}')
def generate_metric_values(): var_edges = numpy.linspace(200, 1200, 31) kv_val = 1.0 num_bins = 100 k2v_val_range = numpy.linspace(-2, 4, num_bins + 1) kl_val_range = numpy.linspace(-14, 16, num_bins + 1) #data_files = fileio_utils.read_coupling_file() #all_events = fileio_utils.get_events(data_files.keys(), data_files) #all_histograms = [ fileio_utils.retrieve_reco_weights(var_edges,events) for events in all_events ] #all_variations = list(zip(data_files.keys(), all_histograms, all_events))#[:7] #all_cutflows = fileio_utils.get_combined_cutflow_values(data_files.keys(), data_files).values() # It's a really good things that python dicts are ordered... truth_data_files = fileio_utils.read_coupling_file( coupling_file='basis_files/truth_LHE_couplings_extended.dat') truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data( truth_data_files.values(), var_edges) all_variations = list( zip(truth_data_files.keys(), truth_weights, truth_errors)) #assert list(data_files.keys()) == list(truth_data_files.keys()) #all_variations = list(zip(data_files.keys(), all_histograms, all_events, all_events, truth_weights, truth_errors)) # Wrap all variations up together with their histograms so I can find combinations #all_variations = list(zip(data_files.keys(), all_histograms, all_cutflows, all_events))#[:7] print('All variations loaded, proceeding to retrieve metrics...') #for variation, cuts in zip(data_files.keys(), all_cutflows): # accXeff = cuts['Signal'] / cuts['Initial'] # print(variation, accXeff) #exit() total = 0 basis_metrics = {} for basis_set in itertools.combinations(all_variations, 6): # Unwrap each combination #couplings, histograms, cutflows, events_list, truth_weights, truth_errors = list(zip(*basis_set)) #couplings, histograms, events_list = list(zip(*basis_set)) couplings, truth_weights, truth_errors = list(zip(*basis_set)) if not is_valid_combination(couplings): continue #if (1.0,1.0,1.0) not in couplings: continue #weights, errors = numpy.array( list(zip(*histograms)) ) basis_metrics[couplings] = { #'Nweight_integral': get_Nweight_sum(couplings, weights, kv_val, k2v_val_range, kl_val_range), 'Nweight_truth_integral': get_Nweight_sum(couplings, truth_weights, kv_val, k2v_val_range, kl_val_range), #'orthogonality': metric_orthogonality(couplings), #'reco_effective_stats_integral': metric_reco_effective_stats_integral(couplings, events_list, kv_val, k2v_val_range, kl_val_range), 'theory_effective_stats_integral': get_theory_effective_stats_map(couplings, kv_val, k2v_val_range, kl_val_range), #'reco_solidarity_integral': get_reco_solidarity_map(couplings, weights, kv_val, k2v_val_range, kl_val_range), 'theory_solidarity_integral': get_theoretical_solidarity_map(couplings, kv_val, k2v_val_range, kl_val_range), #'theory_test_val': metric_theory_test_val(couplings), #'contribution_integral': metric_contribution_integral(couplings, kv_val, k2v_val_range, kl_val_range), #'accXeff_list': metric_accXeff_list(cutflows), #'accXeff_sum': metric_accXeff_sum(cutflows), #'accXeff_geometric': metric_accXeff_geometric_mean(cutflows), #'accXeff_rms': metric_accXeff_rms(cutflows), #'accXeff_avg_stdev': metric_accXeff_avg_stdev(cutflows), #'accXeff_min': metric_accXeff_min(cutflows), #'accXeff_sigma': metric_accXeff_sigma(cutflows), #'accXeff_harmonic': metric_accXeff_harmonic_mean(cutflows), #'eventCount_sum': metric_eventCount_sum(events_list) } total += 1 if total % 10 == 0: print(total) print('Variations traversed, plotting...') metric_lists = {key: [] for key in list(basis_metrics.values())[0].keys()} for basis, metrics in basis_metrics.items(): for key, val in metrics.items(): metric_lists[key].append(val) return metric_lists
def optimize_reco(mode='reco', extra_files={}, extra_name=''): var_edges = numpy.linspace(200, 1200, 31) kv_val = 1.0 num_kappa_bins = 100 k2v_val_range = numpy.linspace(-2, 4, num_kappa_bins + 1) kl_val_range = numpy.linspace(-14, 16, num_kappa_bins + 1) grid_pixel_area = (k2v_val_range[1] - k2v_val_range[0]) * (kl_val_range[1] - kl_val_range[0]) truth_data_files = None if mode == 'reco': data_files = fileio_utils.read_coupling_file() all_events = fileio_utils.get_events(data_files.keys(), data_files) all_histograms = [ fileio_utils.retrieve_reco_weights(var_edges, events) for events in all_events ] all_weights, all_errors = numpy.array(list(zip(*all_histograms))) # Wrap all variations up together with their histograms so I can find combinations all_variations = list(zip(data_files.keys(), all_weights)) elif mode == 'truth': truth_data_files = fileio_utils.read_coupling_file( coupling_file='basis_files/truth_LHE_couplings.dat') truth_data_files.update(extra_files) truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data( truth_data_files.values(), var_edges) all_variations = list(zip(truth_data_files.keys(), truth_weights)) else: print('What are you doing?!') print(mode) exit(1) print('Histograms loaded, proceeding to integrate Nweight grids...') valid_bases = [] total = 0 for basis_set in itertools.combinations(all_variations, 6): # Unwrap each combination couplings, weights = list(zip(*basis_set)) if (1.0, 1.0, 1.0) not in couplings: continue if not combination_utils.is_valid_combination( couplings, base_equations=combination_utils.full_scan_terms): continue nWeight_integral = get_Nweight_sum(couplings, weights, kv_val, k2v_val_range, kl_val_range) #nWeight_integral = get_Nweight_sum(couplings, weights, kv_val, k2v_val_range, kl_val_range, # mask=lambda k2v, kl: ((k2v-1)/1)**2 + ((kl-1)/10)**2 < 1 ) valid_bases.append((nWeight_integral, couplings, weights)) total += 1 if total % 10 == 0: print(total) print('Integrals computed, sorting and printing...') valid_bases.sort() for rank, (integral, couplings, weight) in enumerate(valid_bases): print(rank, int(integral), couplings) #draw_rankings([0,1,2,3], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, 'quad', only_heatmap=True) #draw_rankings([0,1], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, 'top') #draw_rankings([0,1,2,3], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, extra_name+'_truth_quad', only_heatmap=True) draw_rankings([0, 1], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, extra_name + '_truth_top', only_heatmap=False, truth_level=True, truth_data_files=truth_data_files, skip_preview=True)
def compare_bases_reco_method(basis_parameters_list, verification_parameters, base_equations=combination_utils.full_scan_terms, name_suffix='', title_suffix='', labels=('', ''), is_verification=True, truth_level=False, truth_data_files=None): #var_edges = numpy.linspace(200, 1200, 31) #var_edges = numpy.arange(0, 2050, 50) var_edges = numpy.linspace(200, 2000, 55) basis_tuple_list = [] for basis_parameters in basis_parameters_list: reweight_vector = get_amplitude_function(basis_parameters, as_scalar=False, base_equations=base_equations) if truth_level: data_files = fileio_utils.read_coupling_file( coupling_file='basis_files/truth_LHE_couplings_extended.dat') basis_files = [ truth_data_files[coupling] for coupling in basis_parameters ] truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data( basis_files, var_edges) basis_tuple_list.append( (truth_weights, truth_errors, reweight_vector)) else: data_files = fileio_utils.read_coupling_file() base_events_list = fileio_utils.get_events(basis_parameters, data_files) base_histograms = [ fileio_utils.retrieve_reco_weights(var_edges, base_events) for base_events in base_events_list ] base_weights, base_errors = numpy.array(list( zip(*base_histograms))) basis_tuple_list.append( (base_weights, base_errors, reweight_vector)) testpoint_list = verification_parameters if is_verification: if truth_level: verification_files = [ data_files[key] for key in verification_parameters ] truth_verification_weights, truth_verification_errors = fileio_utils.extract_lhe_truth_data( verification_files, var_edges) testpoint_list = zip(verification_parameters, truth_verification_weights, truth_verification_errors) else: testpoint_list = [] verification_events_list = fileio_utils.get_events( verification_parameters, data_files) for events, param in zip(verification_events_list, verification_parameters): verification_weights, verification_errors = fileio_utils.retrieve_reco_weights( var_edges, events) testpoint_list.append( (param, verification_weights, verification_errors)) for testpoint in testpoint_list: verification_weights, verification_errors = None, None if is_verification: coupling_parameters, verification_weights, verification_errors = testpoint else: coupling_parameters = testpoint combined_tuples = [] for base_weights, base_errors, reweight_vector in basis_tuple_list: combined_tuples.append( reco_reweight(reweight_vector, coupling_parameters, base_weights, base_errors)) if truth_level: name = 'truth_mHH_compare' + name_suffix title = 'Truth LHE-Based Linear Combination:\nTruth $m_{HH}$' + title_suffix xlabel = 'Truth $m_{HH}$ (GeV)' else: name = 'reco_mHH_compare' + name_suffix title = 'NNT-Based Linear Combination:\n$m_{HH}$' + title_suffix xlabel = 'Reconstructed $m_{HH}$ (GeV)' plot_histogram( name, title, var_edges, coupling_parameters, combined_tuples[0][0], combined_tuples[0][1], verification_weights, verification_errors, alt_linearly_combined_weights=combined_tuples[1][0], alt_linearly_combined_errors=combined_tuples[1][1], generated_label=labels[0], alt_label=labels[1], xlabel=xlabel, )