def main():
    cmssw_path = os.path.expandvars('$CMSSW_BASE')
    package_dir = os.path.join(cmssw_path,
                               'src/machineLearning/machineLearning')
    settings_dir = os.path.join(package_dir, 'settings')
    global_settings = ut.read_settings(settings_dir, 'global')
    modes = ['nonres/default', 'spin0', 'spin2']
    table_infos = []
    output_file = os.path.expandvars(
        os.path.join(global_settings['output_dir'], 'EventYield.tex'))
    for mode in modes:
        global_settings['scenario'] = mode
        channel_dir = os.path.join(package_dir, 'info', 'HH',
                                   global_settings['channel'])
        reader = hpr.HHParameterReader(channel_dir, mode)
        preferences = reader.parameters
        normalizer = hht.HHDataNormalizer
        loader = hht.HHDataLoader(normalizer, preferences, global_settings)
        mode_data = loader.data
        for era in set(mode_data['era']):
            era_data = mode_data.loc[mode_data['era'] == era]
            channel = global_settings['channel']
            table_creator = eyc.EventYieldTable(era_data, channel, era, mode)
            table_info = table_creator.create_table()
            table_infos.append(table_info)
    table_writer = eyc.EventYieldsFile(table_infos, output_file)
    table_writer.fill_document_file()
    print('File saved to %s' % output_file)
Example #2
0
def prepare_data(analysis):
    channel_dir, info_dir, global_settings = ut.find_settings()
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    preferences['trainvars'] = preferences['all_trainvar_info'].keys()
    startTime = datetime.now()
    print('data loading is started: ' + str(startTime))
    if analysis == 'HHmultilepton':
        normalizer = hht.HHDataNormalizer
        loader = hht.HHDataLoader(normalizer, preferences, global_settings)
    elif analysis == 'HHbbWW':
        normalizer = bbwwt.bbWWDataNormalizer
        loader = bbwwt.bbWWLoader(normalizer, preferences, global_settings)
    data = loader.data
    print('data loading is finished')
    print(datetime.now() - startTime)
    scenario = global_settings['scenario']
    scenario = scenario if 'nonres' in scenario else 'res/' + scenario
    hyperparameters_file = os.path.join(
        os.path.expandvars('$CMSSW_BASE'),
        'src/machineLearning/machineLearning/info/',
        global_settings['process'], global_settings['channel'], scenario,
        'hyperparameters.json')
    with open(hyperparameters_file, 'rt') as in_file:
        preferences['hyperparameters'] = json.load(in_file)
    return data, preferences, global_settings
def main(to_continue, opt_dir, bbww):
    if not to_continue:
        settings_dir = os.path.join(
            os.path.expandvars('$CMSSW_BASE'),
            'src/machineLearning/machineLearning/settings')
        global_settings = ut.read_settings(settings_dir, 'global')
        output_dir = os.path.expandvars(global_settings['output_dir'])
    else:
        settings_dir = os.path.join(opt_dir, 'run_settings')
        global_settings = ut.read_settings(settings_dir, 'global')
        output_dir = opt_dir
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    if not to_continue:
        if not os.path.exists(os.path.join(output_dir, 'run_settings')):
            ut.save_run_settings(output_dir)
        if not os.path.exists(os.path.join(output_dir, 'run_info')):
            ut.save_info_dir(output_dir)
    # use_scratch_for_data(global_settings)
    print("::::::: Reading parameters :::::::")
    if global_settings['ml_method'] == 'xgb':
        param_file = os.path.join(settings_dir, 'xgb_parameters.json')
    else:
        param_file = os.path.join(settings_dir, 'nn_parameters.json')
    hyperparameter_info = ut.read_json_cfg(param_file)
    pso_settings = ut.read_settings(settings_dir, 'pso')
    pso_settings.update(global_settings)
    addition = ut.create_infoPath_addition(global_settings)
    channel_dir = os.path.join(output_dir, 'run_info')
    info_dir = os.path.join(channel_dir, addition)
    scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    preferences = reader.parameters
    normalizer = hht.HHDataNormalizer if not bbww else bbwwt.bbWWDataNormalizer
    if os.path.exists(preferences['data_csv']):
        print(':::::::: Loading data from .csv file ::::::::')
        data = pandas.read_csv(preferences['data_csv'])
    else:
        print('::::::: Loading data to be saved to pandas.DataFrame :::::::')
        if not bbww:
            loader = hht.HHDataLoader(normalizer, preferences, global_settings)
        else:
            loader = bbwwt.bbWWLoader(normalizer, preferences, global_settings)
        data = loader.data
        if global_settings['ml_method'] in ['lbn', 'nn']:
            use_Wjets = True
            if 'bb2l' in global_settings['channel']:
                use_Wjets = False
            data = mt.multiclass_encoding(data, use_Wjets)
        loader.save_to_csv()
    print("\n============ Starting hyperparameter optimization ==========\n")
    swarm = pt.ParticleSwarm(pso_settings, st.get_fitness_score,
                             hyperparameter_info, to_continue, output_dir)
    optimal_hyperparameters = swarm.particleSwarmOptimization()[0]
    print("\n============ Saving results ================\n")
    best_parameters_path = os.path.join(output_dir,
                                        'best_hyperparameters.json')
    ut.save_dict_to_json(optimal_hyperparameters, best_parameters_path)
    print("Results saved to " + str(output_dir))
def main():
    """ Main function for operating the fitting, plotting and creation of
    histo_dict

    Parameters:
    -----------
    fit : bool
        Whether to do a fit
    create_info : bool
        Whether to create histo_dict from scratch
    weight_dir : str
        Path to the directory where the TProfile files will be saved
    masses_type : str
        Type of the masses to be used. 'low', 'high' or 'all'
    create_profile : bool
        Whether to create the TProfiles.

    Returns:
    --------
    Nothing
    """
    channel_dir, info_dir, global_settings = ut.find_settings()
    if 'nonres' in global_settings['scenario']:
        raise TypeError("gen_mHH profiling is done only for resonant cases")
    else:
        scenario = global_settings['scenario']
    reader = hpr.HHParameterReader(channel_dir, scenario)
    normalizer = hht.HHDataNormalizer
    preferences = reader.parameters
    preferences['trainvars'] = preferences['all_trainvar_info'].keys()
    if create_info:
        create_histo_dict(info_dir, preferences)
    if create_profile or fit:
        loader = hht.HHDataLoader(normalizer,
                                  preferences,
                                  global_settings,
                                  normalize=False)
        data = loader.data
        if not os.path.exists(weight_dir):
            os.makedirs(weight_dir)
        if fit:
            do_fit(info_dir, data, preferences)
            resulting_hadd_file = os.path.join(weight_dir, 'all_fitFunc.root')
            print('Creating a single fit file with "hadd" to: ' +
                  str(resulting_hadd_file))
            create_all_fitFunc_file(global_settings)
        if create_profile:
            create_TProfiles(info_dir, data, preferences, label='raw')
            try:
                data = loader.prepare_data(data)
                create_TProfiles(info_dir,
                                 data,
                                 preferences,
                                 label='reweighed')
            except ReferenceError:
                print('No fit for variables found')
                print('Please fit the variables for plots after reweighing')
Example #5
0
def split_data(global_settings, preferences):
    print('============ Starting evaluation ============')
    if os.path.exists(preferences['data_csv']):
        data = pandas.read_csv(preferences['data_csv'])
    else:
        normalizer = hht.HHDataNormalizer
        loader = hht.HHDataLoader(normalizer, preferences, global_settings)
        data = loader.data
    hhvt.plot_correlations(data, preferences['trainvars'], global_settings)
    keysNotToSplit = []
    if '3l_1tau' in global_settings['channel']:
        keysNotToSplit = ['WZ', 'DY', 'TTTo']
        print('These keys are excluded from splitting: ', keysNotToSplit)
    evtNotToSplit = (data['key'].isin(keysNotToSplit))
    evtEven = (data['event'].values % 2 == 0)
    evtOdd = ~(data['event'].values % 2 == 0)
    even_data = data.loc[np.logical_or(evtEven, evtNotToSplit)]
    odd_data = data.loc[np.logical_or(evtOdd, evtNotToSplit)]
    return even_data, odd_data