Ejemplo n.º 1
0
def find_masses():
    """ Finds the masses to be used in the fit

    Parameters:
    -----------
    info_dir : str
        Path to the info_directory of the required channel
    global_settings : dict
        Global settings (channel, bdtType etc.)
    masses_type : str
        Which masses type to use. 'low', 'high' or 'all'

    Returns:
    --------
    masses : list
        List of masses to be used.
    """
    channel_dir, info_dir, global_settings = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    if masses_type == 'all':
        masses = preferences['masses']
    else:
        masses = preferences['masses_' + masses_type]
    return masses
Ejemplo n.º 2
0
def prepare_data(analysis):
    channel_dir, info_dir, global_settings = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    preferences['trainvars'] = preferences['all_trainvar_info'].keys()
    startTime = datetime.now()
    print('data loading is started: ' + str(startTime))
    if analysis == 'HHmultilepton':
        normalizer = hht.HHDataNormalizer
        loader = hht.HHDataLoader(normalizer, preferences, global_settings)
    elif analysis == 'HHbbWW':
        normalizer = bbwwt.bbWWDataNormalizer
        loader = bbwwt.bbWWLoader(normalizer, preferences, global_settings)
    data = loader.data
    print('data loading is finished')
    print(datetime.now() - startTime)
    scenario = global_settings['scenario']
    scenario = scenario if 'nonres' in scenario else 'res/' + scenario
    hyperparameters_file = os.path.join(
        os.path.expandvars('$CMSSW_BASE'),
        'src/machineLearning/machineLearning/info/',
        global_settings['process'], global_settings['channel'], scenario,
        'hyperparameters.json')
    with open(hyperparameters_file, 'rt') as in_file:
        preferences['hyperparameters'] = json.load(in_file)
    return data, preferences, global_settings
Ejemplo n.º 3
0
def main(output_dir, save_model, channel, mode, era, BM):
    settings_dir = os.path.join(
        os.path.expandvars('$CMSSW_BASE'),
        'src/machineLearning/machineLearning/settings')
    global_settings = ut.read_settings(settings_dir, 'global')
    global_settings['ml_method'] = 'lbn'
    global_settings['channel'] = 'bb1l'
    if output_dir == 'None':
        output_dir = global_settings['channel']+'/'+global_settings['ml_method']+'/'+\
                     res_nonres + '/' + mode +'/' + era
        global_settings['output_dir'] = output_dir
    else:
        global_settings['output_dir'] = output_dir
    global_settings['output_dir'] = os.path.expandvars(
        global_settings['output_dir'])
    if not os.path.exists(global_settings['output_dir']):
        os.makedirs(global_settings['output_dir'])
    channel_dir, info_dir, _ = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    if not BM == 'None':
        preferences["nonResScenarios"] = [BM]
    print('BM point to be considered: ' + str(preferences["nonResScenarios"]))
    if not era == '0':
        preferences['included_eras'] = [era.replace('20', '')]
    print('era: ' + str(preferences['included_eras']))
    preferences = define_trainvars(global_settings, preferences, info_dir)
    particles = PARTICLE_INFO[global_settings['channel']]
    data_dict = create_data_dict(preferences, global_settings)
    classes = set(data_dict["even_data"]["process"])
    for class_ in classes:
        multitarget = list(
            set(data_dict["even_data"].loc[data_dict["even_data"]["process"] ==
                                           class_, "multitarget"]))[0]
        print(str(class_) + '\t' + str(multitarget))
    even_model = create_model(preferences, global_settings, data_dict,
                              "even_data", save_model)
    if global_settings['feature_importance'] == 1:
        trainvars = preferences['trainvars']
        data = data_dict['odd_data']
        LBNFeatureImportance = nt.LBNFeatureImportances(even_model, data,\
            trainvars, global_settings['channel'])
        score_dict = LBNFeatureImportance.custom_permutation_importance()
        hhvt.plot_feature_importances_from_dict(score_dict,
                                                global_settings['output_dir'])
    odd_model = create_model(preferences, global_settings, data_dict,
                             "odd_data", save_model)
    print(odd_model.summary())
    nodewise_performance(data_dict['odd_data'], data_dict['even_data'],\
        odd_model, even_model, data_dict['trainvars'], particles, \
        global_settings, preferences)
    even_train_info, even_test_info = evaluate_model(
        even_model, data_dict['even_data'], data_dict['odd_data'],\
        data_dict['trainvars'], global_settings, "even_data", particles)
    odd_train_info, odd_test_info = evaluate_model(
        odd_model, data_dict['odd_data'], data_dict['even_data'], \
        data_dict['trainvars'], global_settings, "odd_data", particles)
    hhvt.plotROC([odd_train_info, odd_test_info],
                 [even_train_info, even_test_info], global_settings)
Ejemplo n.º 4
0
def main():
    """ Main function for operating the fitting, plotting and creation of
    histo_dict

    Parameters:
    -----------
    fit : bool
        Whether to do a fit
    create_info : bool
        Whether to create histo_dict from scratch
    weight_dir : str
        Path to the directory where the TProfile files will be saved
    masses_type : str
        Type of the masses to be used. 'low', 'high' or 'all'
    create_profile : bool
        Whether to create the TProfiles.

    Returns:
    --------
    Nothing
    """
    channel_dir, info_dir, global_settings = ut.find_settings()
    if 'nonres' in global_settings['scenario']:
        raise TypeError("gen_mHH profiling is done only for resonant cases")
    else:
        scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    normalizer = hht.HHDataNormalizer
    preferences = reader.parameters
    preferences['trainvars'] = preferences['all_trainvar_info'].keys()
    if create_info:
        create_histo_dict(info_dir, preferences)
    if create_profile or fit:
        loader = hht.HHDataLoader(normalizer,
                                  preferences,
                                  global_settings,
                                  normalize=False)
        data = loader.data
        if not os.path.exists(weight_dir):
            os.makedirs(weight_dir)
        if fit:
            do_fit(info_dir, data, preferences)
            resulting_hadd_file = os.path.join(weight_dir, 'all_fitFunc.root')
            print('Creating a single fit file with "hadd" to: ' +
                  str(resulting_hadd_file))
            create_all_fitFunc_file(global_settings)
        if create_profile:
            create_TProfiles(info_dir, data, preferences, label='raw')
            try:
                data = loader.prepare_data(data)
                create_TProfiles(info_dir,
                                 data,
                                 preferences,
                                 label='reweighed')
            except ReferenceError:
                print('No fit for variables found')
                print('Please fit the variables for plots after reweighing')
Ejemplo n.º 5
0
def main():
    channel_dir, info_dir, global_settings = ut.find_settings()
    preferences = hhat.get_hh_parameters(channel_dir,
                                         global_settings['tauID_training'],
                                         info_dir)
    total_df = pandas.DataFrame()
    for era in preferences['included_eras']:
        input_path_key = 'inputPath' + str(era)
        preferences['era_inputPath'] = preferences[input_path_key]
        preferences['era_keys'] = preferences['keys' + str(era)]
        era_df, labels = load_one_era(preferences, global_settings)
        total_df.append(era_df, ignore_index=True)
    create_latex_table(total_df, labels, global_settings['output_dir'])
Ejemplo n.º 6
0
def main(output_dir, settings_dir, hyperparameter_file, debug):
    if settings_dir == 'None':
        settings_dir = os.path.join(
            os.path.expandvars('$CMSSW_BASE'),
            'src/machineLearning/machineLearning/settings')
    global_settings = ut.read_settings(settings_dir, 'global')
    if output_dir == 'None':
        output_dir = global_settings['output_dir']
    else:
        global_settings['output_dir'] = output_dir
    global_settings['output_dir'] = os.path.expandvars(
        global_settings['output_dir'])
    if not os.path.exists(global_settings['output_dir']):
        os.makedirs(global_settings['output_dir'])
    channel_dir, info_dir, _ = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    if hyperparameter_file == 'None':
        hyperparameter_file = os.path.join(info_dir, 'hyperparameters.json')
    hyperparameters = ut.read_json_cfg(hyperparameter_file)
    evaluation_main(global_settings, preferences, hyperparameters, debug)
Ejemplo n.º 7
0
def main(output_dir, settings_dir, hyperparameter_file, debug):
    if settings_dir == 'None':
        settings_dir = os.path.join(
            os.path.expandvars('$CMSSW_BASE'),
            'src/machineLearning/machineLearning/settings')
    global_settings = settings_dir + '/' + 'global_%s_%s_%s_settings.json' % (
        channel, mode, res_nonres)
    command = 'rsync %s ~/machineLearning/CMSSW_11_2_0_pre1/src/machineLearning/machineLearning/settings/global_settings.json' % global_settings
    p = subprocess.Popen(command,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    global_settings = ut.read_settings(settings_dir, 'global')
    if output_dir == 'None':
        output_dir = global_settings['channel']+'/'+global_settings['ml_method']+'/'+\
                     res_nonres + '/' + mode +'/' + era
        global_settings['output_dir'] = output_dir
    else:
        global_settings['output_dir'] = output_dir
    global_settings['output_dir'] = os.path.expandvars(
        global_settings['output_dir'])
    if not os.path.exists(global_settings['output_dir']):
        os.makedirs(global_settings['output_dir'])
    channel_dir, info_dir, _ = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    if not BM == 'None':
        preferences["nonResScenarios"] = [BM]
    print('BM point to be considered: ' + str(preferences["nonResScenarios"]))
    if not era == '0':
        preferences['included_eras'] = [era.replace('20', '')]
    print('era: ' + str(preferences['included_eras']))
    preferences = define_trainvars(global_settings, preferences, info_dir)
    if hyperparameter_file == 'None':
        hyperparameter_file = os.path.join(info_dir, 'hyperparameters.json')
    hyperparameters = ut.read_json_cfg(hyperparameter_file)
    print('hyperparametrs ' + str(hyperparameters))
    evaluation_main(global_settings, preferences, hyperparameters, debug)