コード例 #1
0
def main():
    numpy.set_printoptions(precision=2, linewidth=400, threshold=100, sign=' ')
    
    var_edges = numpy.linspace(200, 1200, 31)
    kv_val = 1.0
    #num_bins = 21
    #k2v_val_range = numpy.linspace(-2,4,num_bins)
    #kl_val_range = numpy.linspace(-14,16,num_bins)

    truth_data_files = fileio_utils.read_coupling_file(coupling_file='basis_files/truth_LHE_couplings.dat')
    truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data(truth_data_files.values(), var_edges, stat_limit=None, emulateSelection=True)
コード例 #2
0
def single_negative_weight_map(basis_parameters, name_suffix='_base', truth_level=False):
    var_edges = numpy.linspace(200, 1200, 31)
    kv_val = 1.0
    num_kappa_bins = 100
    k2v_val_range = numpy.linspace(-2,4,num_kappa_bins+1)
    kl_val_range = numpy.linspace(-14,16,num_kappa_bins+1)

    if truth_level:
        truth_data_files = fileio_utils.read_coupling_file(coupling_file='basis_files/truth_LHE_couplings.dat')
        basis_files = [ truth_data_files[coupling] for coupling in basis_parameters ]
        base_weights, base_errors = fileio_utils.extract_lhe_truth_data(basis_files, var_edges)
    else:
        data_files = fileio_utils.read_coupling_file()
        base_events_list = fileio_utils.get_events(basis_parameters, data_files)
        base_histograms = [ fileio_utils.retrieve_reco_weights(var_edges, base_events) for base_events in base_events_list ]
        base_weights, base_errors = numpy.array(list(zip(*base_histograms)))

    integral = get_Nweight_sum(basis_parameters, base_weights, kv_val, k2v_val_range, kl_val_range, grid=False)
    negative_weight_grid = get_Nweight_sum(basis_parameters, base_weights, kv_val, k2v_val_range, kl_val_range, grid=True)
    draw_error_map(basis_parameters, var_edges, kv_val, k2v_val_range, kl_val_range, negative_weight_grid, name_suffix=name_suffix,
                title_suffix=f'Integral={int(integral)}')
コード例 #3
0
def generate_metric_values():
    var_edges = numpy.linspace(200, 1200, 31)
    kv_val = 1.0
    num_bins = 100
    k2v_val_range = numpy.linspace(-2, 4, num_bins + 1)
    kl_val_range = numpy.linspace(-14, 16, num_bins + 1)

    #data_files = fileio_utils.read_coupling_file()
    #all_events = fileio_utils.get_events(data_files.keys(), data_files)
    #all_histograms = [ fileio_utils.retrieve_reco_weights(var_edges,events) for events in all_events ]
    #all_variations = list(zip(data_files.keys(), all_histograms, all_events))#[:7]
    #all_cutflows = fileio_utils.get_combined_cutflow_values(data_files.keys(), data_files).values() # It's a really good things that python dicts are ordered...

    truth_data_files = fileio_utils.read_coupling_file(
        coupling_file='basis_files/truth_LHE_couplings_extended.dat')
    truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data(
        truth_data_files.values(), var_edges)
    all_variations = list(
        zip(truth_data_files.keys(), truth_weights, truth_errors))

    #assert list(data_files.keys()) == list(truth_data_files.keys())
    #all_variations = list(zip(data_files.keys(), all_histograms, all_events, all_events, truth_weights, truth_errors))

    # Wrap all variations up together with their histograms so I can find combinations
    #all_variations = list(zip(data_files.keys(), all_histograms, all_cutflows, all_events))#[:7]
    print('All variations loaded, proceeding to retrieve metrics...')
    #for variation, cuts in zip(data_files.keys(), all_cutflows):
    #    accXeff = cuts['Signal'] / cuts['Initial']
    #    print(variation, accXeff)
    #exit()

    total = 0
    basis_metrics = {}
    for basis_set in itertools.combinations(all_variations, 6):
        # Unwrap each combination
        #couplings, histograms, cutflows, events_list, truth_weights, truth_errors = list(zip(*basis_set))
        #couplings, histograms, events_list = list(zip(*basis_set))
        couplings, truth_weights, truth_errors = list(zip(*basis_set))
        if not is_valid_combination(couplings): continue
        #if (1.0,1.0,1.0) not in couplings: continue

        #weights, errors = numpy.array( list(zip(*histograms)) )

        basis_metrics[couplings] = {
            #'Nweight_integral': get_Nweight_sum(couplings, weights, kv_val, k2v_val_range, kl_val_range),
            'Nweight_truth_integral':
            get_Nweight_sum(couplings, truth_weights, kv_val, k2v_val_range,
                            kl_val_range),
            #'orthogonality': metric_orthogonality(couplings),
            #'reco_effective_stats_integral': metric_reco_effective_stats_integral(couplings, events_list, kv_val, k2v_val_range, kl_val_range),
            'theory_effective_stats_integral':
            get_theory_effective_stats_map(couplings, kv_val, k2v_val_range,
                                           kl_val_range),
            #'reco_solidarity_integral': get_reco_solidarity_map(couplings, weights, kv_val, k2v_val_range, kl_val_range),
            'theory_solidarity_integral':
            get_theoretical_solidarity_map(couplings, kv_val, k2v_val_range,
                                           kl_val_range),
            #'theory_test_val': metric_theory_test_val(couplings),
            #'contribution_integral': metric_contribution_integral(couplings, kv_val, k2v_val_range, kl_val_range),
            #'accXeff_list': metric_accXeff_list(cutflows),
            #'accXeff_sum': metric_accXeff_sum(cutflows),
            #'accXeff_geometric': metric_accXeff_geometric_mean(cutflows),
            #'accXeff_rms': metric_accXeff_rms(cutflows),
            #'accXeff_avg_stdev': metric_accXeff_avg_stdev(cutflows),
            #'accXeff_min': metric_accXeff_min(cutflows),
            #'accXeff_sigma': metric_accXeff_sigma(cutflows),
            #'accXeff_harmonic': metric_accXeff_harmonic_mean(cutflows),
            #'eventCount_sum': metric_eventCount_sum(events_list)
        }

        total += 1
        if total % 10 == 0: print(total)
    print('Variations traversed, plotting...')
    metric_lists = {key: [] for key in list(basis_metrics.values())[0].keys()}
    for basis, metrics in basis_metrics.items():
        for key, val in metrics.items():
            metric_lists[key].append(val)
    return metric_lists
コード例 #4
0
def optimize_reco(mode='reco', extra_files={}, extra_name=''):
    var_edges = numpy.linspace(200, 1200, 31)
    kv_val = 1.0
    num_kappa_bins = 100
    k2v_val_range = numpy.linspace(-2, 4, num_kappa_bins + 1)
    kl_val_range = numpy.linspace(-14, 16, num_kappa_bins + 1)
    grid_pixel_area = (k2v_val_range[1] -
                       k2v_val_range[0]) * (kl_val_range[1] - kl_val_range[0])

    truth_data_files = None
    if mode == 'reco':
        data_files = fileio_utils.read_coupling_file()
        all_events = fileio_utils.get_events(data_files.keys(), data_files)
        all_histograms = [
            fileio_utils.retrieve_reco_weights(var_edges, events)
            for events in all_events
        ]
        all_weights, all_errors = numpy.array(list(zip(*all_histograms)))
        # Wrap all variations up together with their histograms so I can find combinations
        all_variations = list(zip(data_files.keys(), all_weights))
    elif mode == 'truth':
        truth_data_files = fileio_utils.read_coupling_file(
            coupling_file='basis_files/truth_LHE_couplings.dat')
        truth_data_files.update(extra_files)
        truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data(
            truth_data_files.values(), var_edges)
        all_variations = list(zip(truth_data_files.keys(), truth_weights))
    else:
        print('What are you doing?!')
        print(mode)
        exit(1)
    print('Histograms loaded, proceeding to integrate Nweight grids...')

    valid_bases = []
    total = 0
    for basis_set in itertools.combinations(all_variations, 6):
        # Unwrap each combination
        couplings, weights = list(zip(*basis_set))
        if (1.0, 1.0, 1.0) not in couplings: continue
        if not combination_utils.is_valid_combination(
                couplings, base_equations=combination_utils.full_scan_terms):
            continue

        nWeight_integral = get_Nweight_sum(couplings, weights, kv_val,
                                           k2v_val_range, kl_val_range)
        #nWeight_integral = get_Nweight_sum(couplings, weights, kv_val, k2v_val_range, kl_val_range,
        #        mask=lambda k2v, kl: ((k2v-1)/1)**2 + ((kl-1)/10)**2 < 1 )
        valid_bases.append((nWeight_integral, couplings, weights))
        total += 1
        if total % 10 == 0: print(total)
    print('Integrals computed, sorting and printing...')
    valid_bases.sort()
    for rank, (integral, couplings, weight) in enumerate(valid_bases):
        print(rank, int(integral), couplings)

    #draw_rankings([0,1,2,3], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, 'quad', only_heatmap=True)
    #draw_rankings([0,1], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, 'top')

    #draw_rankings([0,1,2,3], valid_bases, var_edges, kv_val, k2v_val_range, kl_val_range, extra_name+'_truth_quad', only_heatmap=True)
    draw_rankings([0, 1],
                  valid_bases,
                  var_edges,
                  kv_val,
                  k2v_val_range,
                  kl_val_range,
                  extra_name + '_truth_top',
                  only_heatmap=False,
                  truth_level=True,
                  truth_data_files=truth_data_files,
                  skip_preview=True)
コード例 #5
0
def compare_bases_reco_method(basis_parameters_list,
                              verification_parameters,
                              base_equations=combination_utils.full_scan_terms,
                              name_suffix='',
                              title_suffix='',
                              labels=('', ''),
                              is_verification=True,
                              truth_level=False,
                              truth_data_files=None):

    #var_edges = numpy.linspace(200, 1200, 31)
    #var_edges = numpy.arange(0, 2050, 50)
    var_edges = numpy.linspace(200, 2000, 55)

    basis_tuple_list = []
    for basis_parameters in basis_parameters_list:
        reweight_vector = get_amplitude_function(basis_parameters,
                                                 as_scalar=False,
                                                 base_equations=base_equations)
        if truth_level:
            data_files = fileio_utils.read_coupling_file(
                coupling_file='basis_files/truth_LHE_couplings_extended.dat')
            basis_files = [
                truth_data_files[coupling] for coupling in basis_parameters
            ]
            truth_weights, truth_errors = fileio_utils.extract_lhe_truth_data(
                basis_files, var_edges)
            basis_tuple_list.append(
                (truth_weights, truth_errors, reweight_vector))
        else:
            data_files = fileio_utils.read_coupling_file()
            base_events_list = fileio_utils.get_events(basis_parameters,
                                                       data_files)
            base_histograms = [
                fileio_utils.retrieve_reco_weights(var_edges, base_events)
                for base_events in base_events_list
            ]
            base_weights, base_errors = numpy.array(list(
                zip(*base_histograms)))
            basis_tuple_list.append(
                (base_weights, base_errors, reweight_vector))

    testpoint_list = verification_parameters
    if is_verification:
        if truth_level:
            verification_files = [
                data_files[key] for key in verification_parameters
            ]
            truth_verification_weights, truth_verification_errors = fileio_utils.extract_lhe_truth_data(
                verification_files, var_edges)
            testpoint_list = zip(verification_parameters,
                                 truth_verification_weights,
                                 truth_verification_errors)
        else:
            testpoint_list = []
            verification_events_list = fileio_utils.get_events(
                verification_parameters, data_files)
            for events, param in zip(verification_events_list,
                                     verification_parameters):
                verification_weights, verification_errors = fileio_utils.retrieve_reco_weights(
                    var_edges, events)
                testpoint_list.append(
                    (param, verification_weights, verification_errors))

    for testpoint in testpoint_list:
        verification_weights, verification_errors = None, None
        if is_verification:
            coupling_parameters, verification_weights, verification_errors = testpoint
        else:
            coupling_parameters = testpoint

        combined_tuples = []
        for base_weights, base_errors, reweight_vector in basis_tuple_list:
            combined_tuples.append(
                reco_reweight(reweight_vector, coupling_parameters,
                              base_weights, base_errors))

        if truth_level:
            name = 'truth_mHH_compare' + name_suffix
            title = 'Truth LHE-Based Linear Combination:\nTruth $m_{HH}$' + title_suffix
            xlabel = 'Truth $m_{HH}$ (GeV)'
        else:
            name = 'reco_mHH_compare' + name_suffix
            title = 'NNT-Based Linear Combination:\n$m_{HH}$' + title_suffix
            xlabel = 'Reconstructed $m_{HH}$ (GeV)'

        plot_histogram(
            name,
            title,
            var_edges,
            coupling_parameters,
            combined_tuples[0][0],
            combined_tuples[0][1],
            verification_weights,
            verification_errors,
            alt_linearly_combined_weights=combined_tuples[1][0],
            alt_linearly_combined_errors=combined_tuples[1][1],
            generated_label=labels[0],
            alt_label=labels[1],
            xlabel=xlabel,
        )