コード例 #1
0
ファイル: lhe_reader.py プロジェクト: rbarrue/madminer
    def save(self, filename_out, shuffle=True):
        """
        Saves the observable definitions, observable values, and event weights in a MadMiner file. The parameter,
        benchmark, and morphing setup is copied from the file provided during initialization. Nuisance benchmarks found
        in the LHE file are added.

        Parameters
        ----------
        filename_out : str
            Path to where the results should be saved.

        shuffle : bool, optional
            If True, events are shuffled before being saved. That's important when there are multiple distinct
            samples (e.g. signal and background). Default value: True.

        Returns
        -------
            None

        """

        if self.observations is None or self.weights is None:
            logger.warning("No events to save!")
            return

        logger.debug("Loading HDF5 data from %s and saving file to %s",
                     self.filename, filename_out)

        # Save nuisance parameters and benchmarks
        weight_names = list(self.weights.keys())
        logger.debug("Weight names: %s", weight_names)

        save_nuisance_setup(
            file_name=filename_out,
            file_override=True,
            nuisance_benchmarks=weight_names,
            nuisance_parameters=self.nuisance_parameters,
            reference_benchmark=self.reference_benchmark,
            copy_from_path=self.filename,
        )

        # Save events
        save_events(
            file_name=filename_out,
            file_override=True,
            observables=self.observables,
            observations=self.observations,
            weights=self.weights,
            sampling_benchmarks=self.events_sampling_benchmark_ids,
            num_signal_events=self.signal_events_per_benchmark,
            num_background_events=self.background_events,
        )

        if shuffle:
            combine_and_shuffle([filename_out], filename_out)
コード例 #2
0
    'delta_phi_zz',
    '(lep1ZZ+lep2ZZ).deltaphi(lep3ZZ+lep4ZZ) * (-1. + 2.*float((lep1ZZ+lep2ZZ).eta > (lep3ZZ+lep4ZZ).eta))',
    required=False,
    default=float('nan'),
    #default=0,
)

delphes.add_observable(
    'n_jets',
    'len(j)',
    required=True,
)

delphes.add_observable(
    'met',
    'met.pt',
    required=True,
)

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')
delphes.add_cut('m_jj > 700.')

delphes.analyse_delphes_samples()

delphes.save('/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data.h5')

combine_and_shuffle(
    ['/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data.h5'],
    '/data_CMS/cms/cortinovis/ewdim6/data_ew_1M_az/delphes_data_shuffled.h5')
コード例 #3
0
# run this from terminal with madminer stuff installed to be safe
from __future__ import absolute_import, division, print_function, unicode_literals

import logging
from madminer.sampling import combine_and_shuffle



# MadMiner output
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.DEBUG
)

# Output of all other modules (e.g. matplotlib)
for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)
        
        
        
mg_dir = '/home/software/MG5_aMC_v2_6_2/'
        
delphesDatasetList = ['data/delphes_data{}.h5'.format(i) for i in range (1,101)]
#delphesDatasetList = ['data/delphes_data{}.h5'.format(i) for i in range (1,6)] #for testing

combine_and_shuffle(
    delphesDatasetList,
    'data/delphes_data_shuffledBig.h5'
)
コード例 #4
0
methods = inputs['methods']
print(methods)
methods = map(lambda x: str(x), methods)

test_split = float(inputs['test_split'])  #training-test split

# get number of paramenters
hf = h5py.File(h5_file, 'r')
parameters = len(hf['parameters']['names'])

#to shuffle or not to shuffle
if (inputs['shuffle']):
    h5shuffle_file = '/home/data/madminer_example_shuffled.h5'

    combine_and_shuffle([h5_file], h5shuffle_file)

    sampler = SampleAugmenter(h5shuffle_file,
                              include_nuisance_parameters=nuisance
                              )  #'data/madminer_example_shuffled.h5'

else:
    sampler = SampleAugmenter(h5_file, include_nuisance_parameters=nuisance)

for method in methods:
    print('sampling from method ', method)

    for i in range(n_trainsamples):

        # creates training samples
コード例 #5
0
#e can also add cuts, again in parse-able strings. In addition to the objects discussed above, they can contain the observables:

# In[335]:

delphes.add_cut('isZZcand == 1')
delphes.add_cut('pt_j1 > 20.')
delphes.add_cut('n_jets >= 2')

# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[336]:

delphes.analyse_delphes_samples()

# In[337]:

delphes.save('data_hel_hw_hb/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[340]:

combine_and_shuffle(['data_hel_hw_hb/delphes_data.h5'],
                    'data_hel_hw_hb/delphes_data_shuffled.h5')
コード例 #6
0
delphes.analyse_delphes_samples()

# In[238]:

delphes.save('data_ew_wphi2/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[245]:

combine_and_shuffle(['data_ew_wphi2/delphes_data.h5'],
                    'data_ew_wphi2/delphes_data_shuffled.h5')

# In[ ]:

#import h5py

#def get_all(name):
#   print(name)

#ith h5py.File('data_ew/delphes_data.h5', 'r') as f:
#   g_name = f.visit(get_all)
#   d = f['samples/observations']
#   print(len([a for a in d[:,0] if a!=0]))
#   print(len(d[:,0]))
#for item in d[:]:
#print(item[1])
コード例 #7
0

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')


# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[237]:


delphes.analyse_delphes_samples()


# In[238]:


delphes.save('data_sme_hw_hbox/delphes_data.h5')


combine_and_shuffle(
    ['data_sme_hw_hbox/delphes_data.h5'],
    'data_sme_hw_hbox/delphes_data_shuffled.h5'
)




コード例 #8
0
# MadMiner output
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.DEBUG
)

# Output of all other modules (e.g. matplotlib)
for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)
        
        
        
mg_dir = '/home/software/MG5_aMC_v2_6_2/'
        

path = "./data/"
delphesDatasetList = [f for f in glob.glob(path + "delphes_data?.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data??.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data???.h5")]
delphesDatasetList += [f for f in glob.glob(path + "delphes_data????.h5")]
#delphesDatasetList = ['data/delphes_data.h5'.format(i) for i in range (1,201)]

combine_and_shuffle(
    delphesDatasetList,
    'data/delphes_data_shuffled.h5',
    k_factors=0.00029507, # specific to 1k events in run card and suboptimal simulating!!!
)
print ("Files combined: ",len(delphesDatasetList))
コード例 #9
0
#e can also add cuts, again in parse-able strings. In addition to the objects discussed above, they can contain the observables:

# In[335]:

delphes.add_cut('isZZcand == 1')
delphes.add_cut('n_jets >= 2')
delphes.add_cut('pt_j1 > 20.')

# ## 4. Analyse events and store data

# The function `analyse_samples` then calculates all observables from the Delphes file(s) generated before and checks which events pass the cuts:

# In[336]:

delphes.analyse_delphes_samples()

# In[337]:

delphes.save('data_dim6_3/delphes_data.h5')

# ## 6. Combine and shuffle different samples

# To reduce disk usage, you can generate several small event samples with the steps given above, and combine them now. Note that (for now) it is essential that all of them are generated with the same setup, including the same benchmark points / morphing basis!
#
# This is generally good practice even if you use just one sample, since the events might have some inherent ordering (e.g. from sampling from different hypotheses). Later when we split the events into a training and test fraction, such an ordering could cause problems.

# In[340]:

combine_and_shuffle(['data_dim6_3/delphes_data.h5'],
                    'data_dim6_3/delphes_data_shuffled.h5')