def process(config_file, binfile): # Setup logging. log = logging.getLogger(__name__) start_time = time.time() # Load config from file. config = utils.load_config(config_file) nominal_conf = {} nominal_conf['alpha'] = [0.55, 1.0] nominal_conf['missing_mass'] = [0.0, 5.0] nominal_conf['p_mes'] = [0.35, 2.0] # Load entire dataset, this # should only be done once # because it's 1.5 GB at load time. data = utils.load_dataset(config) if binfile is not None: with open(binfile, 'rb') as bfile: bins = pickle.load(bfile) else: bins = setup_binning(config, data) kin_limits = find_kinematic_limits_in_bins(data, bins) kin_limits.to_csv('kinematic_limits_mc.csv', index = False)
def process(config_file, samples): # Setup logging. log = logging.getLogger(__name__) start_time = time.time() # Load config from file. config = utils.load_config(config_file) # Load entire dataset, this # should only be done once # because it's 1.5 GB at load time. data = utils.load_dataset(config) # Applying nominal cuts to get the subset # of events that I consider good when # using the "best" cut values. nominal_filter = utils.build_filter(data) nominal_data = utils.build_dataframe(data, nominal_filter) # Randomize the sectors to test # if we can at least get the same # answer. utils.randomize_sector(data) varfile = os.path.dirname(__file__) + '/../../variations.json' variations = load_variations(varfile) # Use quantile binning to get integrated bins # for the axes listed in the configuration. bins = setup_binning(config, nominal_data) # Calculate the results for the nominal subset of data. results = {} results['nominal'] = utils.get_results(nominal_data, bins, config) # Calculate the results for each sector. for sector in range(1, 7): sector_data = data[data['sector'] == sector] for imc in range(samples): var_time = time.time() log.info('Doing sector {}'.format(sector)) random_filter = utils.get_random_config(sector_data, variations) random_data = utils.build_dataframe(sector_data, random_filter) sect_result = utils.get_results(random_data, bins, config) elapsed_time = time.time() - var_time log.info('Elapsed time %.3f' % elapsed_time) output_filename = str(config['database_path'] + 'phi/random/sector_' + str(sector) + '_{}.csv'.format(imc)) sect_result.to_csv(output_filename, index=False) exe_time = time.time() - start_time log.info('Finished execution in %.3f seconds.' % exe_time)
def process(config_file): # Setup logging. log = logging.getLogger(__name__) start_time = time.time() # Load config from file. config = utils.load_config(config_file) nominal_conf = {} #nominal_conf['alpha'] = [0.55, 1.0] #nominal_conf['missing_mass'] = [0.0, 5.0] nominal_conf['p_mes'] = [0.35, 1.8] # Load entire dataset, this # should only be done once # because it's 1.5 GB at load time. data = utils.load_dataset(config) #data = data.dropna(how='any') print(data.info()) # Applying nominal cuts to get the subset # of events that I consider good when # using the "best" cut values. #nominal_filter = utils.build_filter(data, nominal_conf) #nominal_data = utils.build_dataframe(data, nominal_filter) # Use quantile binning to get integrated bins # for the axes listed in the configuration. #bins = setup_binning(config, nominal_data) bins = setup_binning(config, data) with open('binning_mc.pkl', 'wb') as binf: pickle.dump(bins, binf) #kin_limits = find_kinematic_limits_in_bins(data, bins) #kin_limits.to_csv('kinematic_limits_mc.csv', index = False) # Calculate the results for the nominal subset of data. results = utils.get_results(data, bins, config) results.to_csv(config['output_filename'], index=False)
'dist_ecv_min': (-1.1, -0.9), 'dist_ecv_max': (0.9, 1.1), 'dist_ecw_min': (-1.1, -0.9), 'dist_ecw_max': (0.9, 1.1), 'dist_ec_edep_min': (-1.1, -0.9), 'dist_ec_edep_max': (0.9, 1.1), 'dist_vz_min': (-1.1, -0.9), 'dist_vz_max': (0.9, 1.1), 'missing_mass_min': (0.0, 1.75), 'p_mes_min': (0.3, 0.4), 'p_mes_max': (1.6, 1.8) } # Load the configuration file and entire # dataset (once). config = utils.load_config(args.config) data = utils.load_dataset(config) # Nominal data to get binning nominal_filter = utils.build_filter(data) nominal_data = utils.build_dataframe(data, nominal_filter) bins = setup_binning(config, nominal_data) objective_fn = partial(process_par_set, data=data, config=config) opt = BayesianOptimization(f=objective_fn, pbounds=parameter_bounds, random_state=1) opt.maximize(init_points=args.init_points, n_iter=args.n_iter)
def process(config_file): # Setup logging. log = logging.getLogger(__name__) start_time = time.time() # Load config from file. config = utils.load_config(config_file) # Load entire dataset, this # should only be done once # because it's 1.5 GB at load time. data = utils.load_dataset(config) utils.randomize_sector(data) # Applying nominal cuts to get the subset # of events that I consider good when # using the "best" cut values. if args.bayes_opt_pars is not None: log.info("Using Bayesian Optimized parameters for nominal.") with open(args.bayes_opt_pars, 'rb') as f: bayes_pars = pickle.load(f) params = {str(k): float(v) for k, v in bayes_pars['params'].items()} bayes_conf = build_bayesian_optimized_config(**params) nominal_filter = utils.build_filter(data, bayes_conf) else: nominal_filter = utils.build_filter(data) nominal_data = utils.build_dataframe(data, nominal_filter) # Use quantile binning to get integrated bins # for the axes listed in the configuration. bins = setup_binning(config, nominal_data) # Calculate the results for the nominal subset of data. results = {} results['nominal'] = utils.get_results(nominal_data, bins, config) # Calculate the results for each sector. for sector in range(1, 7): var_time = time.time() log.info('Doing sector {}'.format(sector)) sector_data = nominal_data[nominal_data['sector'] == sector] sect_result = utils.get_results(sector_data, bins, config) elapsed_time = time.time() - var_time log.info('Elapsed time %.3f' % elapsed_time) output_filename = str(config['database_path'] + 'phi/sector_' + str(sector) + '.csv') sect_result.to_csv(output_filename, index=False) del nominal_data # Define variations to consider. These # are the systematics that are applied. variations = load_variations(config['variation_file']) for par in variations.keys(): results[par] = {} for index in variations[par].keys(): var_time = time.time() log.info( 'Doing %.3f < %s < %.3f' % (variations[par][index][0], par, variations[par][index][1])) # get these cut values temp_dict = {} temp_dict[par] = variations[par][index] # get data temp_filter = utils.build_filter(data, temp_dict) temp_data = utils.build_dataframe(data, temp_filter) results[par][index] = utils.get_results(temp_data, bins, config) del temp_data end_var_time = time.time() - var_time log.info('Elapsed time %.3f' % end_var_time) # Using all variations, systematic # uncertainties are added to the dataframe. systematic_sources = assign_systematics(results) with open(config['systematics_file'], 'wb') as outputfile: pickle.dump(systematic_sources, outputfile) #pickle.dump(systematic_sources, config['systematics_file']) # Write results to file. results['nominal'].to_csv(config['output_filename'], index=False) # Write other results too. dont_write = ['sector'.format(s) for s in range(1, 7)] dont_write.append('nominal') for key in results.keys(): if key not in dont_write: for conf in results[key]: output_filename = str(config['database_path'] + 'phi/variation_' + key + '_' + str(conf) + '.csv') results[key][conf].to_csv(output_filename, index=False) exe_time = time.time() - start_time log.info('Finished execution in %.3f seconds.' % exe_time)