def find_masses(): """ Finds the masses to be used in the fit Parameters: ----------- info_dir : str Path to the info_directory of the required channel global_settings : dict Global settings (channel, bdtType etc.) masses_type : str Which masses type to use. 'low', 'high' or 'all' Returns: -------- masses : list List of masses to be used. """ channel_dir, info_dir, global_settings = ut.find_settings() scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) preferences = reader.parameters if masses_type == 'all': masses = preferences['masses'] else: masses = preferences['masses_' + masses_type] return masses
def prepare_data(analysis): channel_dir, info_dir, global_settings = ut.find_settings() scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) preferences = reader.parameters preferences['trainvars'] = preferences['all_trainvar_info'].keys() startTime = datetime.now() print('data loading is started: ' + str(startTime)) if analysis == 'HHmultilepton': normalizer = hht.HHDataNormalizer loader = hht.HHDataLoader(normalizer, preferences, global_settings) elif analysis == 'HHbbWW': normalizer = bbwwt.bbWWDataNormalizer loader = bbwwt.bbWWLoader(normalizer, preferences, global_settings) data = loader.data print('data loading is finished') print(datetime.now() - startTime) scenario = global_settings['scenario'] scenario = scenario if 'nonres' in scenario else 'res/' + scenario hyperparameters_file = os.path.join( os.path.expandvars('$CMSSW_BASE'), 'src/machineLearning/machineLearning/info/', global_settings['process'], global_settings['channel'], scenario, 'hyperparameters.json') with open(hyperparameters_file, 'rt') as in_file: preferences['hyperparameters'] = json.load(in_file) return data, preferences, global_settings
def main(output_dir, save_model, channel, mode, era, BM): settings_dir = os.path.join( os.path.expandvars('$CMSSW_BASE'), 'src/machineLearning/machineLearning/settings') global_settings = ut.read_settings(settings_dir, 'global') global_settings['ml_method'] = 'lbn' global_settings['channel'] = 'bb1l' if output_dir == 'None': output_dir = global_settings['channel']+'/'+global_settings['ml_method']+'/'+\ res_nonres + '/' + mode +'/' + era global_settings['output_dir'] = output_dir else: global_settings['output_dir'] = output_dir global_settings['output_dir'] = os.path.expandvars( global_settings['output_dir']) if not os.path.exists(global_settings['output_dir']): os.makedirs(global_settings['output_dir']) channel_dir, info_dir, _ = ut.find_settings() scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) preferences = reader.parameters if not BM == 'None': preferences["nonResScenarios"] = [BM] print('BM point to be considered: ' + str(preferences["nonResScenarios"])) if not era == '0': preferences['included_eras'] = [era.replace('20', '')] print('era: ' + str(preferences['included_eras'])) preferences = define_trainvars(global_settings, preferences, info_dir) particles = PARTICLE_INFO[global_settings['channel']] data_dict = create_data_dict(preferences, global_settings) classes = set(data_dict["even_data"]["process"]) for class_ in classes: multitarget = list( set(data_dict["even_data"].loc[data_dict["even_data"]["process"] == class_, "multitarget"]))[0] print(str(class_) + '\t' + str(multitarget)) even_model = create_model(preferences, global_settings, data_dict, "even_data", save_model) if global_settings['feature_importance'] == 1: trainvars = preferences['trainvars'] data = data_dict['odd_data'] LBNFeatureImportance = nt.LBNFeatureImportances(even_model, data,\ trainvars, global_settings['channel']) score_dict = LBNFeatureImportance.custom_permutation_importance() hhvt.plot_feature_importances_from_dict(score_dict, global_settings['output_dir']) odd_model = create_model(preferences, global_settings, data_dict, "odd_data", save_model) print(odd_model.summary()) nodewise_performance(data_dict['odd_data'], data_dict['even_data'],\ odd_model, even_model, data_dict['trainvars'], particles, \ global_settings, preferences) even_train_info, even_test_info = evaluate_model( even_model, data_dict['even_data'], data_dict['odd_data'],\ data_dict['trainvars'], global_settings, "even_data", particles) odd_train_info, odd_test_info = evaluate_model( odd_model, data_dict['odd_data'], data_dict['even_data'], \ data_dict['trainvars'], global_settings, "odd_data", particles) hhvt.plotROC([odd_train_info, odd_test_info], [even_train_info, even_test_info], global_settings)
def main(): """ Main function for operating the fitting, plotting and creation of histo_dict Parameters: ----------- fit : bool Whether to do a fit create_info : bool Whether to create histo_dict from scratch weight_dir : str Path to the directory where the TProfile files will be saved masses_type : str Type of the masses to be used. 'low', 'high' or 'all' create_profile : bool Whether to create the TProfiles. Returns: -------- Nothing """ channel_dir, info_dir, global_settings = ut.find_settings() if 'nonres' in global_settings['scenario']: raise TypeError("gen_mHH profiling is done only for resonant cases") else: scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) normalizer = hht.HHDataNormalizer preferences = reader.parameters preferences['trainvars'] = preferences['all_trainvar_info'].keys() if create_info: create_histo_dict(info_dir, preferences) if create_profile or fit: loader = hht.HHDataLoader(normalizer, preferences, global_settings, normalize=False) data = loader.data if not os.path.exists(weight_dir): os.makedirs(weight_dir) if fit: do_fit(info_dir, data, preferences) resulting_hadd_file = os.path.join(weight_dir, 'all_fitFunc.root') print('Creating a single fit file with "hadd" to: ' + str(resulting_hadd_file)) create_all_fitFunc_file(global_settings) if create_profile: create_TProfiles(info_dir, data, preferences, label='raw') try: data = loader.prepare_data(data) create_TProfiles(info_dir, data, preferences, label='reweighed') except ReferenceError: print('No fit for variables found') print('Please fit the variables for plots after reweighing')
def main(): channel_dir, info_dir, global_settings = ut.find_settings() preferences = hhat.get_hh_parameters(channel_dir, global_settings['tauID_training'], info_dir) total_df = pandas.DataFrame() for era in preferences['included_eras']: input_path_key = 'inputPath' + str(era) preferences['era_inputPath'] = preferences[input_path_key] preferences['era_keys'] = preferences['keys' + str(era)] era_df, labels = load_one_era(preferences, global_settings) total_df.append(era_df, ignore_index=True) create_latex_table(total_df, labels, global_settings['output_dir'])
def main(output_dir, settings_dir, hyperparameter_file, debug): if settings_dir == 'None': settings_dir = os.path.join( os.path.expandvars('$CMSSW_BASE'), 'src/machineLearning/machineLearning/settings') global_settings = ut.read_settings(settings_dir, 'global') if output_dir == 'None': output_dir = global_settings['output_dir'] else: global_settings['output_dir'] = output_dir global_settings['output_dir'] = os.path.expandvars( global_settings['output_dir']) if not os.path.exists(global_settings['output_dir']): os.makedirs(global_settings['output_dir']) channel_dir, info_dir, _ = ut.find_settings() scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) preferences = reader.parameters if hyperparameter_file == 'None': hyperparameter_file = os.path.join(info_dir, 'hyperparameters.json') hyperparameters = ut.read_json_cfg(hyperparameter_file) evaluation_main(global_settings, preferences, hyperparameters, debug)
def main(output_dir, settings_dir, hyperparameter_file, debug): if settings_dir == 'None': settings_dir = os.path.join( os.path.expandvars('$CMSSW_BASE'), 'src/machineLearning/machineLearning/settings') global_settings = settings_dir + '/' + 'global_%s_%s_%s_settings.json' % ( channel, mode, res_nonres) command = 'rsync %s ~/machineLearning/CMSSW_11_2_0_pre1/src/machineLearning/machineLearning/settings/global_settings.json' % global_settings p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) global_settings = ut.read_settings(settings_dir, 'global') if output_dir == 'None': output_dir = global_settings['channel']+'/'+global_settings['ml_method']+'/'+\ res_nonres + '/' + mode +'/' + era global_settings['output_dir'] = output_dir else: global_settings['output_dir'] = output_dir global_settings['output_dir'] = os.path.expandvars( global_settings['output_dir']) if not os.path.exists(global_settings['output_dir']): os.makedirs(global_settings['output_dir']) channel_dir, info_dir, _ = ut.find_settings() scenario = global_settings['scenario'] reader = hpr.HHParameterReader(channel_dir, scenario) preferences = reader.parameters if not BM == 'None': preferences["nonResScenarios"] = [BM] print('BM point to be considered: ' + str(preferences["nonResScenarios"])) if not era == '0': preferences['included_eras'] = [era.replace('20', '')] print('era: ' + str(preferences['included_eras'])) preferences = define_trainvars(global_settings, preferences, info_dir) if hyperparameter_file == 'None': hyperparameter_file = os.path.join(info_dir, 'hyperparameters.json') hyperparameters = ut.read_json_cfg(hyperparameter_file) print('hyperparametrs ' + str(hyperparameters)) evaluation_main(global_settings, preferences, hyperparameters, debug)