def main(): if len(sys.argv) != 4: print "Error: exactly 3 arguments are required" config_file_in = sys.argv[1] config_file_out = sys.argv[2] masspoint = float(sys.argv[3]) confhandler = ConfigFileHandler() confhandler.load_configuration(config_file_in) confhandler.set_field('global', 'mass_point', str(masspoint)) confhandler.save_configuration(config_file_out)
def save_params(out_path, params, evalcnt): confhandler = ConfigFileHandler() if os.path.exists(out_path): confhandler.load_configuration(out_path) section_name = 'evaluation_' + str(evalcnt) confhandler.new_section(section_name) for key, value in params.iteritems(): confhandler.set_field(section_name, key, str(value)) confhandler.save_configuration(out_path)
def punzi_target(WP_VBF2j, WP_VBF1j, WP_WHh, WP_ZHh): global evalcnt bin_dir = "/home/llr/cms/wind/cmssw/CMSSW_9_4_2/bin/slc6_amd64_gcc630/" cost_function_evaluator = "run_WP_evaluator" output = check_output([ bin_dir + cost_function_evaluator, ref_dir, out_dir, str(lumi), str(WP_VBF2j), str(WP_VBF1j), str(WP_WHh), str(WP_ZHh) ]) costval = 0.0 for line in output.split('\n'): if "cost = " in line: costval = float(line.replace("cost = ", "")) break if math.isnan(costval): costval = -8.75 # save the sampled point such that later they can be used as exploration points (if the need occurs) confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) print "saving evaluation for iteration " + str(evalcnt) section_name = 'evaluation_' + str(evalcnt) confhandler.new_section(section_name) confhandler.set_field(section_name, 'cost', str(costval)) confhandler.set_field(section_name, 'WP_VBF2j', str(WP_VBF2j)) confhandler.set_field(section_name, 'WP_VBF1j', str(WP_VBF1j)) confhandler.set_field(section_name, 'WP_WHh', str(WP_WHh)) confhandler.set_field(section_name, 'WP_ZHh', str(WP_ZHh)) confhandler.save_configuration(evaluations_path) evalcnt += 1 return costval
def save_priors(out_path, priors): # combine all the results into the final prior and save it again confhandler = ConfigFileHandler() confhandler.config.optionxform = str confhandler.new_section('Priors') confhandler.set_field('Priors', 'VBF_prior', str(1.0)) confhandler.set_field('Priors', 'ggH_prior', str(priors["ggh_prior"])) confhandler.set_field('Priors', 'ttHlept_prior', str(priors["tthlept_prior"])) confhandler.set_field('Priors', 'ttHhadr_prior', str(priors["tthhadr_prior"])) confhandler.set_field('Priors', 'ZHlept_prior', str(priors["zhlept_prior"])) confhandler.set_field('Priors', 'WHlept_prior', str(priors["whlept_prior"])) confhandler.set_field('Priors', 'ZHhadr_prior', str(priors["zhhadr_prior"])) confhandler.set_field('Priors', 'WHhadr_prior', str(priors["whhadr_prior"])) confhandler.set_field('Priors', 'ZHMET_prior', str(priors["zhmet_prior"])) confhandler.set_field('Priors', "ZX_prior", str(priors["bkg_prior"])) confhandler.set_field('Priors', "qq_prior", str(priors["qq_prior"])) confhandler.save_configuration(out_path)
def main(): def append_variables_raw(confhandler, impdict): section_name = impdict["discriminant"] confhandler.new_section(section_name) periodic_inputs = [] nonperiodic_inputs = [] for key, val in impdict.iteritems(): if key is not "discriminant": if "phi" in key or "Phi" in key or "xi" in key or "xistar" in key: periodic_inputs.append(key) else: nonperiodic_inputs.append(key) confhandler.set_field(section_name, "nonperiodic_columns", ConfigFileUtils.serialize_list(nonperiodic_inputs, lambda x: x)) confhandler.set_field(section_name, "periodic_columns", ConfigFileUtils.serialize_list(periodic_inputs, lambda x: x)) def convert_varname(raw): raw = raw.replace('(', '[') raw = raw.replace(')', ']') return raw def select_input_features_cumulative(H1_stream, H0_stream, discriminant_name, scalar_branches, list_branches, list_pt_limits, confhandler, df_scores, cumulative_threshold = 0.99): print "using cumulative threshold = " + str(cumulative_threshold) # temporary fix: for any discriminant that involves Z+X, do not use PF-MET (for the data / MC comparison would otherwise be potentially biased) if "ZX" in model.name: print "blocking PFMET for model " + model.name scalar_branches.remove("PFMET") print "will select input features from: " + str(scalar_branches + list_branches) implist = scorer.get_sorted_feature_importance_list(H1_stream, H0_stream, scalar_branches, list_branches, list_pt_limits) print "implist: " + str(implist) # now iterate through the sorted list that has been returned and put only the highest-ranked variables, up to the threshold running_sum = 0 impdict = {} for key, val in implist: if running_sum < cumulative_threshold: impdict[convert_varname(key)] = [val] running_sum += val impdict["discriminant"] = discriminant_name print "impdict: " + str(impdict) append_variables_raw(confhandler, impdict) df = df_scores.append(pd.DataFrame.from_dict(impdict)) print str(implist) return df if len(sys.argv) != 6: print "Error: exactly 5 arguments are required" return out_dir = sys.argv[1] campaign_name = sys.argv[2] MC_path = sys.argv[3] usemela = sys.argv[4] threshold = float(sys.argv[5]) # input variables that are stored as lists list_branches = ["Jet", "ExtraLep"] # limit pt values for these lists: here, jets are only used if their pt > 30GeV, no restrictions are placed on leptons list_pt_limits = [30, 0] # scalar (i.e. non-list) input variables production_branches = ["PFMET", "nCleanedJetsPt30", "nCleanedJetsPt30BTagged_bTagSF", "nExtraLep", "ZZMass_masked", "nExtraZ", "Z1Mass", "Z2Mass", "Z1Pt", "Z2Pt", "ZZMassErr", "ZZPt", "ZZEta", "ZZPhi", "Z1Flav", "Z2Flav"] decay_branches = ["costhetastar", "helphi", "helcosthetaZ1", "helcosthetaZ2", "phistarZ1", "xi", "xistar"] print "using cumulative_threshold = " + str(threshold) if "y" in usemela: MELA_branches = ["D_bkg_ME", "D_VBF2j_ggH_ME", "D_VBF1j_ggH_ME", "D_WHh_ggH_ME", "D_ZHh_ggH_ME", "D_WHh_ZHh_ME", "D_VBF2j_WHh_ME", "D_VBF2j_ZHh_ME"] else: MELA_branches = [] scorer = BDTscorer(MC_path) confhandler = ConfigFileHandler() df_fscores = pd.DataFrame() # create a model collection with default input variables and default hyperparameters, just to get a list of all the models and their training data files mcolls = SimpleModelFactoryDynamic.GenerateSimpleModelCollections(MC_path, input_config_file = None, hyperparam_config_file = None) # iterate over all models that are contained in the list of model collections for mcoll in mcolls: for model_name in mcoll.model_dict.keys(): model = mcoll.model_dict[model_name] pre = mcoll.preprocessor_dict[model_name] H1_stream = {} H0_stream = {} print "==========================================================================" print "selecting input variables for model " + model_name for key, val in mcoll.H1_stream.iteritems(): H1_stream[key] = lambda row, val = val, pre = pre: val(row) and pre.cuts(row) print "adding preprocessor cuts on top for " + key + ": " + pre.cuts_s for key, val in mcoll.H0_stream.iteritems(): H0_stream[key] = lambda row, val = val, pre = pre: val(row) and pre.cuts(row) print "adding preprocessor cuts on top for " + key + ": " + pre.cuts_s df_fscores = select_input_features_cumulative(H1_stream, H0_stream, model_name, production_branches + decay_branches + MELA_branches, list_branches, list_pt_limits, confhandler, df_fscores, threshold) print "==========================================================================" confhandler.save_configuration(os.path.join(out_dir, campaign_name + "_inputs.conf")) df_fscores.to_csv(os.path.join(out_dir, campaign_name + "_fscore_table_bkg.csv"))
def run_bayesian_optimization(name, eval_file, target, var_ranges, init_points, max_iterations, patience, alpha): global evalcnt evalcnt = 0 print "now optimizing the following variables: " + str(var_ranges) print "alpha = " + str(alpha) # change the kernel to have a length scale more appropriate to this function # alpha ... corresponds to the value added to the diagonal elements of the covariance matrix <-> the approximate noise level in the observations gp_params = {'kernel': ConstantKernel(1.0, (1e-8, 1e2)) * Matern(length_scale = 0.01, length_scale_bounds = (1e-5, 1e5), nu = 1.5), 'alpha': alpha} bo = BayesianOptimization(target, var_ranges) # check if a file with previous evaluations of this utility function already exists, if so, use it for initialization evaluations_path = os.path.join(out_dir, eval_file) if os.path.exists(evaluations_path): confhandler = ConfigFileHandler() confhandler.load_configuration(evaluations_path) init_dict = {} for section_name in confhandler.get_sections(): cur_section = confhandler.get_section(section_name) for key, value in cur_section.iteritems(): # only take those variables that are actually relevant if key in var_ranges or key == "target": if key not in init_dict: init_dict[key] = [] init_dict[key].append(float(value)) evalcnt = int(re.sub('evaluation_', '', confhandler.get_sections()[-1])) + 1 print "resuming " + name + " at evaluation " + str(evalcnt) init_points_loaded = len(init_dict["target"]) print "found " + str(init_points_loaded) + " initialization points: " + str(init_dict) bo.initialize(init_dict) bo.maximize(init_points = max(0, init_points - init_points_loaded), n_iter = 0, acq = 'poi', kappa = 3, xi = xi_scheduler(0.0, max_iterations), **gp_params) print "initialization done" else: bo.maximize(init_points = init_points, n_iter = 0, acq = 'poi', kappa = 3, xi = xi_scheduler(0.0, max_iterations), **gp_params) cur_iteration = 1 patience_cnt = 0 best_cost = -7.0 for it in range(max_iterations): cur_xi = xi_scheduler(cur_iteration, max_iterations) print "cur_iteration = " + str(cur_iteration) + ", using xi = " + str(cur_xi) cur_iteration += 1 bo.maximize(init_points = 0, n_iter = 1, acq = 'poi', kappa = 3, xi = cur_xi, **gp_params) # evaluate the current maximum curval = bo.res['max'] cost = curval['max_val'] curparams = curval['max_params'] confhandler = ConfigFileHandler() confhandler.config.optionxform = str confhandler.new_section(name) confhandler.set_field(name, 'target', str(cost)) for key, val in curparams.iteritems(): confhandler.set_field(name, key, str(val)) confhandler.save_configuration(os.path.join(out_dir, name + '.txt')) # check if it is time to stop this optimization if(cost > best_cost): best_cost = cost patience_cnt = 0 patience_cnt += 1 if(patience_cnt > patience): break return curparams
def main(): def _compute_class_weights_lengths(gen, preprocessor, MC_weighting=False): # determine the actual size of the available dataset and adjust the sample weights correspondingly H1_data = gen.H1_collection.get_data(Config.branches, 0.0, 1.0) H0_data = gen.H0_collection.get_data(Config.branches, 0.0, 1.0) H1_length = len(preprocessor.process(H1_data).values()[0]) H1_indices = preprocessor.get_last_indices() H0_length = len(preprocessor.process(H0_data).values()[0]) H0_indices = preprocessor.get_last_indices() print "H1_length = " + str(H1_length) print "H0_length = " + str(H0_length) # if per-sample weighting is enabled, also set up the normalization of the event weights if MC_weighting: H1_weight_sum = np.sum( np.maximum(np.array(H1_data["training_weight"][H1_indices]), 0.0)) H0_weight_sum = np.sum( np.maximum(np.array(H0_data["training_weight"][H0_indices]), 0.0)) H1_class_weight = float(H0_length) / H1_weight_sum H0_class_weight = float(H1_length) / H0_weight_sum else: # H1_class_weight = 1.0 # H0_class_weight = float(H1_length) / float(H0_length) H1_class_weight = 1.0 + float(H0_length) / float(H1_length) H0_class_weight = 1.0 + float(H1_length) / float(H0_length) return H1_class_weight, H0_class_weight, H1_length, H0_length # this computes low-level performance metrics for a model collection, i.e. the mean-quare error # computed on the validation dataset for each discriminant. Since the validation datasets will be held constant, # this is an easy way to directly compare different models setting_dir = sys.argv[1] training_dir = sys.argv[2] out_dir = sys.argv[3] # first, need to read in the trained ModelCollection: mconfhandler = ModelCollectionConfigFileHandler() mconfhandler.load_configuration(setting_dir + "settings.conf") mcolls = mconfhandler.GetModelCollection(weightpath=training_dir) confhandler = ConfigFileHandler() out_path = out_dir + "model_benchmark.txt" # for the evaluation, need to proceed in the same way as for training, but evaluate the models on the validation # data instead of training them on the training data for mcoll in mcolls: models, preprocessors, settings = mcoll.get_models( ), mcoll.get_preprocessors(), mcoll.get_settings() for cur_model, cur_preprocessor, cur_settings in zip( models, preprocessors, settings): val_gen = Generator(mcoll.H1_stream, mcoll.H0_stream, Config.branches, preprocessor=cur_preprocessor, chunks=1, MC_weighting=False) val_gen.setup_validation_data() val_H1_classweight, val_H0_classweight, H1_length, H0_length = _compute_class_weights_lengths( val_gen, cur_preprocessor, False) print val_H1_classweight print val_H0_classweight print H1_length print H0_length val_gen.set_H1_weight(val_H1_classweight) val_gen.set_H0_weight(val_H0_classweight) val_gen.set_minimum_length(0) cur_model.get_keras_model().compile(optimizer=optimizers.Adam(), loss="mean_squared_error", metrics=["binary_accuracy"]) res = cur_model.get_keras_model().evaluate_generator( val_gen.preprocessed_generator(), steps=1) print "statistics for model " + cur_model.name print res print cur_model.get_keras_model().metrics_names confhandler.new_section(cur_model.name) confhandler.set_field(cur_model.name, 'H0_val_length', str(H0_length)) confhandler.set_field(cur_model.name, 'H1_val_length', str(H1_length)) confhandler.set_field(cur_model.name, 'val_loss', str(res[0])) confhandler.save_configuration(out_path)
def main(): # runs to check for (good) models (the first one passed is taken as reference run from which the available models # are taken - it is expected that all others runs also follow this structure): input_runs = [] print "===================================================================" print "looking for models in the following runs:" for campaign_dir in sys.argv[1:-2]: for run_dir in next(os.walk(campaign_dir))[1]: if not "bin" in run_dir: run_path = os.path.join(campaign_dir, run_dir) print run_path input_runs.append(run_path) print "===================================================================" # output training campaign, this will consist of a combination of the models found in the campaigns listed above, in such a way that the overall performance is optimized output_run = os.path.join(sys.argv[-1], "optimized") # where the configuration file for the hyperparameter settings should be stored hyperparam_output = os.path.join(output_run, "../hyperparameters.conf") os.makedirs(output_run) # load the available model names reference_run = input_runs[0] available_mcolls = os.walk(os.path.join(reference_run, "training")).next()[1] mcolls_winning = [] for mcoll in available_mcolls: models = os.walk(os.path.join(reference_run, "training", mcoll)).next()[1] # load a representative version of the current model collection... mconfhandler = ModelCollectionConfigFileHandler() mconfhandler.load_configuration( os.path.join(reference_run, "settings_training", mcoll, "settings.conf")) mcoll_template = mconfhandler.GetModelCollection()[0] # ... but strip away all the actual model components mcoll_template.model_dict = {} mcoll_template.preprocessor_dict = {} mcoll_template.settings_dict = {} for model in models: # compare this model across the different runs losses = [get_loss(run, mcoll, model) for run in input_runs] winner = np.argmin(losses) winning_run = input_runs[winner] # copy the winning model into the output run shutil.copytree( os.path.join(winning_run, "training", mcoll, model), os.path.join(output_run, "training", mcoll, model)) print "--------------------------------------------" print " take " + model + " from " + winning_run print "--------------------------------------------" # load the winning model to keep track of its settings mconfhandler = ModelCollectionConfigFileHandler() mconfhandler.load_configuration( os.path.join(winning_run, "settings_training", mcoll, "settings.conf")) mcoll_winning = mconfhandler.GetModelCollection()[0] # then pull the winning model over into the template winning_model = mcoll_winning.model_dict[model] winning_preprocessor = mcoll_winning.preprocessor_dict[model] winning_settings = mcoll_winning.settings_dict[model] mcoll_template.add_model(winning_preprocessor, winning_model, winning_settings) mcolls_winning.append(mcoll_template) # now save the put-together config file also into the output run mconfhandler = ModelCollectionConfigFileHandler() mconfhandler.ToConfiguration(mcolls_winning) mconfhandler.save_configuration(os.path.join(output_run, "settings.conf")) # now distriute again the training settings, as usual: distribute_training_settings(output_run + '/') # now create the hyperparameter config file for each model, taken from the winners hp_confhandler = ConfigFileHandler() for mcoll in mcolls_winning: for model_name, model in mcoll.model_dict.iteritems(): hp_confhandler.new_section(model_name) hp_confhandler.set_field( model_name, "hyperparameters", ConfigFileUtils.serialize_dict(model.hyperparameters, lambda x: str(x))) hp_confhandler.save_configuration(hyperparam_output) print "===================================================================" print "hyperparameter configuration file written to " + hyperparam_output print "==================================================================="
def punzi_target(ggH_prior, WHhadr_prior, ZHhadr_prior, WHlept_prior, ZHlept_prior, ZHMET_prior, ttHhadr_prior, ttHlept_prior): global evalcnt bin_dir = "/home/llr/cms/wind/cmssw/CMSSW_9_4_2/bin/slc6_amd64_gcc630/" cost_function_evaluator = "run_prior_evaluator" output = check_output([ bin_dir + cost_function_evaluator, run_dir, out_dir, engine, str(ggH_prior), str(WHhadr_prior), str(ZHhadr_prior), str(WHlept_prior), str(ZHlept_prior), str(ZHMET_prior), str(ttHhadr_prior), str(ttHlept_prior) ]) costval = 0.0 for line in output.split('\n'): if "cost = " in line: costval = float(line.replace("cost = ", "")) break if math.isnan(costval): costval = -8.75 # add a regularization term that prefers default priors (i.e. close to 1.0) reg_term = 1.0 / 8.0 * ( (ggH_prior - 1.0)**2.0 + (WHhadr_prior - 1.0)**2.0 + (ZHhadr_prior - 1.0)**2.0 + (WHlept_prior - 1.0)**2.0 + (ZHlept_prior - 1.0)**2.0 + (ZHMET_prior - 1.0)**2.0 + (ttHhadr_prior - 1.0)**2.0 + (ttHlept_prior - 1.0)**2.0) costval -= reg_term * lambda_reg # save the sampled point such that later they can be used as exploration points (if the need occurs) confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) print "saving evaluation for iteration " + str(evalcnt) section_name = 'evaluation_' + str(evalcnt) confhandler.new_section(section_name) confhandler.set_field(section_name, 'cost', str(costval)) confhandler.set_field(section_name, 'ggH_prior', str(ggH_prior)) confhandler.set_field(section_name, 'WHhadr_prior', str(WHhadr_prior)) confhandler.set_field(section_name, 'ZHhadr_prior', str(ZHhadr_prior)) confhandler.set_field(section_name, 'WHlept_prior', str(WHlept_prior)) confhandler.set_field(section_name, 'ZHlept_prior', str(ZHlept_prior)) confhandler.set_field(section_name, 'ZHMET_prior', str(ZHMET_prior)) confhandler.set_field(section_name, 'ttHhadr_prior', str(ttHhadr_prior)) confhandler.set_field(section_name, 'ttHlept_prior', str(ttHlept_prior)) confhandler.save_configuration(evaluations_path) evalcnt += 1 return costval
def main(): global evalcnt if len(sys.argv) != 4: print "Error: exactly 3 arguments are required" run_dir = sys.argv[1] out_dir = sys.argv[2] engine = sys.argv[3] print run_dir print out_dir print engine # punzi_target_2d = lambda WHlept_prior, ZHlept_prior: punzi_target(ggH_prior_default, WHhadr_prior_default, ZHhadr_prior_default, # WHlept_prior, ZHlept_prior, ZHMET_prior_default, # ttHhadr_prior_default, ttHlept_prior_default) def punzi_target(ggH_prior, WHhadr_prior, ZHhadr_prior, WHlept_prior, ZHlept_prior, ZHMET_prior, ttHhadr_prior, ttHlept_prior): global evalcnt bin_dir = "/home/llr/cms/wind/cmssw/CMSSW_9_4_2/bin/slc6_amd64_gcc630/" cost_function_evaluator = "run_prior_evaluator" output = check_output([ bin_dir + cost_function_evaluator, run_dir, out_dir, engine, str(ggH_prior), str(WHhadr_prior), str(ZHhadr_prior), str(WHlept_prior), str(ZHlept_prior), str(ZHMET_prior), str(ttHhadr_prior), str(ttHlept_prior) ]) costval = 0.0 for line in output.split('\n'): if "cost = " in line: costval = float(line.replace("cost = ", "")) break if math.isnan(costval): costval = -8.75 # add a regularization term that prefers default priors (i.e. close to 1.0) reg_term = 1.0 / 8.0 * ( (ggH_prior - 1.0)**2.0 + (WHhadr_prior - 1.0)**2.0 + (ZHhadr_prior - 1.0)**2.0 + (WHlept_prior - 1.0)**2.0 + (ZHlept_prior - 1.0)**2.0 + (ZHMET_prior - 1.0)**2.0 + (ttHhadr_prior - 1.0)**2.0 + (ttHlept_prior - 1.0)**2.0) costval -= reg_term * lambda_reg # save the sampled point such that later they can be used as exploration points (if the need occurs) confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) print "saving evaluation for iteration " + str(evalcnt) section_name = 'evaluation_' + str(evalcnt) confhandler.new_section(section_name) confhandler.set_field(section_name, 'cost', str(costval)) confhandler.set_field(section_name, 'ggH_prior', str(ggH_prior)) confhandler.set_field(section_name, 'WHhadr_prior', str(WHhadr_prior)) confhandler.set_field(section_name, 'ZHhadr_prior', str(ZHhadr_prior)) confhandler.set_field(section_name, 'WHlept_prior', str(WHlept_prior)) confhandler.set_field(section_name, 'ZHlept_prior', str(ZHlept_prior)) confhandler.set_field(section_name, 'ZHMET_prior', str(ZHMET_prior)) confhandler.set_field(section_name, 'ttHhadr_prior', str(ttHhadr_prior)) confhandler.set_field(section_name, 'ttHlept_prior', str(ttHlept_prior)) confhandler.save_configuration(evaluations_path) evalcnt += 1 return costval eps = 1e-1 delta = 0.2 bo = BayesianOptimization( punzi_target, { 'ggH_prior': (1.0 - delta, 1.0 + delta), 'WHhadr_prior': (eps, 1.0), 'ZHhadr_prior': (eps, 1.0), 'WHlept_prior': (eps, 1.0), 'ZHlept_prior': (eps, 1.0), 'ZHMET_prior': (eps, 1.0), 'ttHhadr_prior': (eps, 1.0), 'ttHlept_prior': (eps, 1.0) }) # bo = BayesianOptimization(punzi_target_2d, {'WHlept_prior': (eps, WHlept_prior_default + delta), # 'ZHlept_prior': (eps, ZHlept_prior_default + delta)}) # check if a file with previously evaluated points exists, if so, use them for initialization confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) ggH_priors_init = [] WHhadr_priors_init = [] ZHhadr_priors_init = [] WHlept_priors_init = [] ZHlept_priors_init = [] ZHMET_priors_init = [] ttHhadr_priors_init = [] ttHlept_priors_init = [] targets_init = [] for section_name in confhandler.get_sections(): cur_section = confhandler.get_section(section_name) targets_init.append(float(cur_section['cost'])) ggH_priors_init.append(float(cur_section['ggH_prior'])) WHhadr_priors_init.append(float(cur_section['WHhadr_prior'])) ZHhadr_priors_init.append(float(cur_section['ZHhadr_prior'])) WHlept_priors_init.append(float(cur_section['WHlept_prior'])) ZHlept_priors_init.append(float(cur_section['ZHlept_prior'])) ZHMET_priors_init.append(float(cur_section['ZHMET_prior'])) ttHhadr_priors_init.append(float(cur_section['ttHhadr_prior'])) ttHlept_priors_init.append(float(cur_section['ttHlept_prior'])) init_dict = { 'target': targets_init, 'ggH_prior': ggH_priors_init, 'WHhadr_prior': WHhadr_priors_init, 'ZHhadr_prior': ZHhadr_priors_init, 'WHlept_prior': WHlept_priors_init, 'ZHlept_prior': ZHlept_priors_init, 'ZHMET_prior': ZHMET_priors_init, 'ttHhadr_prior': ttHhadr_priors_init, 'ttHlept_prior': ttHlept_priors_init } evalcnt = int(re.sub('evaluation_', '', confhandler.get_sections()[-1])) + 1 print "resuming at evaluation " + str(evalcnt) bo.initialize(init_dict) initialized = True else: initialized = False # change the kernel to have a length scale more appropriate to this function # alpha ... corresponds to the value added to the diagonal elements of the covariance matrix <-> the approximate noise level in the observations gp_params = { 'kernel': 1.0 * Matern(length_scale=0.05, length_scale_bounds=(1e-5, 1e5), nu=1.5), 'alpha': 1e-1 } # perform the standard initialization and setup if initialized: bo.maximize(init_points=0, n_iter=0, acq='poi', kappa=3, xi=xi_scheduler(0.0), **gp_params) else: bo.maximize(init_points=6, n_iter=0, acq='poi', kappa=3, xi=xi_scheduler(0.0), **gp_params) cur_iteration = 1 for it in range(1000): cur_iteration += 1 cur_xi = xi_scheduler(cur_iteration) print "using xi = " + str(cur_xi) bo.maximize(init_points=6, n_iter=1, acq='poi', kappa=3, xi=cur_xi, **gp_params) # evaluate the current maximum curval = bo.res['max'] cost = curval['max_val'] priors = curval['max_params'] confhandler = ConfigFileHandler() confhandler.config.optionxform = str confhandler.new_section('Priors') confhandler.set_field('Priors', 'cost', str(cost)) confhandler.set_field('Priors', 'VBF_prior', str(1.0)) for key, val in priors.iteritems(): confhandler.set_field('Priors', key, str(val)) confhandler.save_configuration(out_dir + 'priors.txt')
def main(): global evalcnt if len(sys.argv) != 4: print "Error: exactly 3 arguments are required" ref_dir = sys.argv[1] out_dir = sys.argv[2] lumi = float(sys.argv[3]) print ref_dir print out_dir print lumi def punzi_target(WP_VBF2j, WP_VBF1j, WP_WHh, WP_ZHh): global evalcnt bin_dir = "/home/llr/cms/wind/cmssw/CMSSW_9_4_2/bin/slc6_amd64_gcc630/" cost_function_evaluator = "run_WP_evaluator" output = check_output([ bin_dir + cost_function_evaluator, ref_dir, out_dir, str(lumi), str(WP_VBF2j), str(WP_VBF1j), str(WP_WHh), str(WP_ZHh) ]) costval = 0.0 for line in output.split('\n'): if "cost = " in line: costval = float(line.replace("cost = ", "")) break if math.isnan(costval): costval = -8.75 # save the sampled point such that later they can be used as exploration points (if the need occurs) confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) print "saving evaluation for iteration " + str(evalcnt) section_name = 'evaluation_' + str(evalcnt) confhandler.new_section(section_name) confhandler.set_field(section_name, 'cost', str(costval)) confhandler.set_field(section_name, 'WP_VBF2j', str(WP_VBF2j)) confhandler.set_field(section_name, 'WP_VBF1j', str(WP_VBF1j)) confhandler.set_field(section_name, 'WP_WHh', str(WP_WHh)) confhandler.set_field(section_name, 'WP_ZHh', str(WP_ZHh)) confhandler.save_configuration(evaluations_path) evalcnt += 1 return costval eps = 1e-3 delta = 0.2 bo = BayesianOptimization( punzi_target, { 'WP_VBF2j': (eps, 1.0 - eps), 'WP_VBF1j': (eps, 1.0 - eps), 'WP_WHh': (eps, 1.0 - eps), 'WP_ZHh': (eps, 1.0 - eps) }) # check if a file with previously evaluated points exists, if so, use them for initialization confhandler = ConfigFileHandler() evaluations_path = out_dir + 'evaluations.txt' if os.path.exists(evaluations_path): confhandler.load_configuration(evaluations_path) targets_init = [] WP_VBF2j_init = [] WP_VBF1j_init = [] WP_WHh_init = [] WP_ZHh_init = [] for section_name in confhandler.get_sections(): cur_section = confhandler.get_section(section_name) targets_init.append(float(cur_section['cost'])) WP_VBF2j_init.append(float(cur_section['WP_VBF2j'])) WP_VBF1j_init.append(float(cur_section['WP_VBF1j'])) WP_WHh_init.append(float(cur_section['WP_WHh'])) WP_ZHh_init.append(float(cur_section['WP_ZHh'])) init_dict = { 'target': targets_init, 'WP_VBF2j': WP_VBF2j_init, 'WP_VBF1j': WP_VBF1j_init, 'WP_WHh': WP_WHh_init, 'WP_ZHh': WP_ZHh_init } evalcnt = int(re.sub('evaluation_', '', confhandler.get_sections()[-1])) + 1 print "resuming at evaluation " + str(evalcnt) bo.initialize(init_dict) initialized = True else: initialized = False # change the kernel to have a length scale more appropriate to this function gp_params = { 'kernel': 1.0 * Matern(length_scale=0.05, length_scale_bounds=(1e-5, 1e5), nu=1.5), 'alpha': 1e-5 } # perform the standard initialization and setup if initialized: bo.maximize(init_points=0, n_iter=0, acq='poi', kappa=3, xi=xi_scheduler(0.0), **gp_params) else: bo.maximize(init_points=6, n_iter=0, acq='poi', kappa=3, xi=xi_scheduler(0.0), **gp_params) cur_iteration = 1 for it in range(1000): cur_xi = xi_scheduler(cur_iteration) cur_iteration += 1 print "using xi = " + str(cur_xi) bo.maximize(init_points=6, n_iter=1, acq='poi', kappa=3, xi=cur_xi, **gp_params) # evaluate the current maximum curval = bo.res['max'] cost = curval['max_val'] WPs = curval['max_params'] confhandler = ConfigFileHandler() confhandler.config.optionxform = str confhandler.new_section('WPs') confhandler.set_field('WPs', 'cost', str(cost)) for key, val in WPs.iteritems(): confhandler.set_field('WPs', key, str(val)) confhandler.save_configuration(out_dir + 'WPs.txt')