예제 #1
0
def process(config_file, samples):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    nominal_filter = utils.build_filter(data)
    nominal_data = utils.build_dataframe(data, nominal_filter)

    # Randomize the sectors to test
    # if we can at least get the same
    # answer.
    utils.randomize_sector(data)

    varfile = os.path.dirname(__file__) + '/../../variations.json'
    variations = load_variations(varfile)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    bins = setup_binning(config, nominal_data)

    # Calculate the results for the nominal subset of data.
    results = {}
    results['nominal'] = utils.get_results(nominal_data, bins, config)

    # Calculate the results for each sector.
    for sector in range(1, 7):

        sector_data = data[data['sector'] == sector]

        for imc in range(samples):

            var_time = time.time()
            log.info('Doing sector {}'.format(sector))
            random_filter = utils.get_random_config(sector_data, variations)
            random_data = utils.build_dataframe(sector_data, random_filter)
            sect_result = utils.get_results(random_data, bins, config)
            elapsed_time = time.time() - var_time
            log.info('Elapsed time %.3f' % elapsed_time)
            output_filename = str(config['database_path'] +
                                  'phi/random/sector_' + str(sector) +
                                  '_{}.csv'.format(imc))
            sect_result.to_csv(output_filename, index=False)

    exe_time = time.time() - start_time
    log.info('Finished execution in %.3f seconds.' % exe_time)
예제 #2
0
def process(config_file):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    nominal_conf = {}
    #nominal_conf['alpha'] = [0.55, 1.0]
    #nominal_conf['missing_mass'] = [0.0, 5.0]
    nominal_conf['p_mes'] = [0.35, 1.8]

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)
    #data = data.dropna(how='any')
    print(data.info())

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    #nominal_filter = utils.build_filter(data, nominal_conf)
    #nominal_data   = utils.build_dataframe(data, nominal_filter)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    #bins = setup_binning(config, nominal_data)
    bins = setup_binning(config, data)
    with open('binning_mc.pkl', 'wb') as binf:
        pickle.dump(bins, binf)

    #kin_limits = find_kinematic_limits_in_bins(data, bins)
    #kin_limits.to_csv('kinematic_limits_mc.csv', index = False)

    # Calculate the results for the nominal subset of data.
    results = utils.get_results(data, bins, config)
    results.to_csv(config['output_filename'], index=False)
예제 #3
0
def process_par_set(data,
                    config,
                    alpha_min=0.55,
                    alpha_max=1.0,
                    dist_cc_min=-1.0,
                    dist_cc_max=1.0,
                    dist_cc_theta_min=-1.0,
                    dist_cc_theta_max=1.0,
                    dist_dcr1_min=-1.0,
                    dist_dcr1_max=1.0,
                    dist_dcr3_min=-1.0,
                    dist_dcr3_max=1.0,
                    dist_ecsf_min=-1.0,
                    dist_ecsf_max=1.0,
                    dist_ecu_min=-1.0,
                    dist_ecu_max=1.0,
                    dist_ecv_min=-1.0,
                    dist_ecv_max=1.0,
                    dist_ecw_min=-1.0,
                    dist_ecw_max=1.0,
                    dist_ec_edep_min=-1.0,
                    dist_ec_edep_max=1.0,
                    dist_vz_min=-1.0,
                    dist_vz_max=1.0,
                    missing_mass_min=0.0,
                    missing_mass_max=5.0,
                    p_mes_min=0.35,
                    p_mes_max=2.0):
    """ Process one dataset. """

    conf = {}
    conf['alpha'] = [alpha_min, alpha_max]
    conf['dist_cc'] = [dist_cc_min, dist_cc_max]
    conf['dist_cc_theta'] = [dist_cc_theta_min, dist_cc_theta_max]
    conf['dist_dcr1'] = [dist_dcr1_min, dist_dcr1_max]
    conf['dist_dcr3'] = [dist_dcr3_min, dist_dcr3_max]
    conf['dist_ecsf'] = [dist_ecsf_min, dist_ecsf_max]
    conf['dist_ecu'] = [dist_ecu_min, dist_ecu_max]
    conf['dist_ecv'] = [dist_ecv_min, dist_ecv_max]
    conf['dist_ecw'] = [dist_ecw_min, dist_ecw_max]
    conf['dist_ec_edep'] = [dist_ec_edep_min, dist_ec_edep_max]
    conf['dist_vz'] = [dist_vz_min, dist_vz_max]
    conf['missing_mass'] = [missing_mass_min, missing_mass_max]
    conf['p_mes'] = [p_mes_min, p_mes_max]

    data_filter = utils.build_filter(data, conf)
    df = utils.build_dataframe(data, data_filter)

    # how many points?!
    npoints = len(config['axes']) * config['n_bins'] * 12
    sector_values = np.zeros(shape=(npoints, 6))
    sector_errors = np.zeros(shape=(npoints, 6))
    for i in range(1, 7):
        sector_data = df[df['sector'] == i]
        sector_result = utils.get_results(sector_data, bins, config)
        sector_values[:, i - 1] = sector_result['value'].values
        sector_errors[:, i - 1] = sector_result['stat'].values

    var = np.var(sector_values, axis=1)
    metric = np.exp(-0.5 * np.sum(var))
    return metric
예제 #4
0
def process(config_file):

    # Setup logging.
    log = logging.getLogger(__name__)

    start_time = time.time()

    # Load config from file.
    config = utils.load_config(config_file)

    # Load entire dataset, this
    # should only be done once
    # because it's 1.5 GB at load time.
    data = utils.load_dataset(config)
    utils.randomize_sector(data)

    # Applying nominal cuts to get the subset
    # of events that I consider good when
    # using the "best" cut values.
    if args.bayes_opt_pars is not None:
        log.info("Using Bayesian Optimized parameters for nominal.")
        with open(args.bayes_opt_pars, 'rb') as f:
            bayes_pars = pickle.load(f)

        params = {str(k): float(v) for k, v in bayes_pars['params'].items()}
        bayes_conf = build_bayesian_optimized_config(**params)
        nominal_filter = utils.build_filter(data, bayes_conf)

    else:
        nominal_filter = utils.build_filter(data)

    nominal_data = utils.build_dataframe(data, nominal_filter)

    # Use quantile binning to get integrated bins
    # for the axes listed in the configuration.
    bins = setup_binning(config, nominal_data)

    # Calculate the results for the nominal subset of data.
    results = {}
    results['nominal'] = utils.get_results(nominal_data, bins, config)

    # Calculate the results for each sector.
    for sector in range(1, 7):
        var_time = time.time()
        log.info('Doing sector {}'.format(sector))

        sector_data = nominal_data[nominal_data['sector'] == sector]
        sect_result = utils.get_results(sector_data, bins, config)

        elapsed_time = time.time() - var_time
        log.info('Elapsed time %.3f' % elapsed_time)

        output_filename = str(config['database_path'] + 'phi/sector_' +
                              str(sector) + '.csv')
        sect_result.to_csv(output_filename, index=False)

    del nominal_data

    # Define variations to consider.  These
    # are the systematics that are applied.
    variations = load_variations(config['variation_file'])
    for par in variations.keys():
        results[par] = {}

        for index in variations[par].keys():

            var_time = time.time()
            log.info(
                'Doing  %.3f < %s < %.3f' %
                (variations[par][index][0], par, variations[par][index][1]))

            # get these cut values
            temp_dict = {}
            temp_dict[par] = variations[par][index]

            # get data
            temp_filter = utils.build_filter(data, temp_dict)
            temp_data = utils.build_dataframe(data, temp_filter)
            results[par][index] = utils.get_results(temp_data, bins, config)
            del temp_data

            end_var_time = time.time() - var_time
            log.info('Elapsed time %.3f' % end_var_time)

    # Using all variations, systematic
    # uncertainties are added to the dataframe.
    systematic_sources = assign_systematics(results)
    with open(config['systematics_file'], 'wb') as outputfile:
        pickle.dump(systematic_sources, outputfile)
    #pickle.dump(systematic_sources, config['systematics_file'])

    # Write results to file.
    results['nominal'].to_csv(config['output_filename'], index=False)

    # Write other results too.
    dont_write = ['sector'.format(s) for s in range(1, 7)]
    dont_write.append('nominal')
    for key in results.keys():
        if key not in dont_write:
            for conf in results[key]:
                output_filename = str(config['database_path'] +
                                      'phi/variation_' + key + '_' +
                                      str(conf) + '.csv')
                results[key][conf].to_csv(output_filename, index=False)

    exe_time = time.time() - start_time
    log.info('Finished execution in %.3f seconds.' % exe_time)