Ejemplo n.º 1
0
                    epochs=100,
                    read_n=None)

# set directories for saving and loading with extra envelope subdir for qr models
experiment = ex.Experiment(run_n=params.run_n).setup(model_dir_qr=True,
                                                     analysis_dir_qr=True)
experiment.model_dir_qr = os.path.join(experiment.model_dir_qr, 'envelope')
pathlib.Path(experiment.model_dir_qr).mkdir(parents=True, exist_ok=True)
result_dir = '/eos/user/k/kiwoznia/data/QR_results/analysis/run_' + str(
    params.run_n) + '/envelope'
pathlib.Path(result_dir).mkdir(parents=True, exist_ok=True)

#****************************************#
#           read in qcd data
#****************************************#
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': 'run_' + str(params.run_n)})

data_qcd_all = dapr.merge_qcd_base_and_ext_datasets(params, paths)
print('qcd all: min mjj = {}, max mjj = {}'.format(np.min(data_qcd_all['mJJ']),
                                                   np.max(
                                                       data_qcd_all['mJJ'])))
# split qcd data
data_qcd_parts = slice_datasample_n_parts(data_qcd_all, parts_n)

cut_results = {}

#****************************************#
#           for each quantile
#****************************************#
for quantile in quantiles:
# loss strategies
strategy_ids_total_loss = ['s1', 's2', 's3', 's4', 's5']
strategy_ids_reco_kl_loss = ['rk5', 'rk5_1', 'rk5_01']
strategy_ids_kl_loss = ['kl1', 'kl2', 'kl3', 'kl4', 'kl5']

# set background sample to use
BG_sample = samp.BG_SR_sample
SIG_samples = samp.SIG_samples_na
mass_centers = [1500, 2500, 3500, 4500]
plot_name_suffix = BG_sample + '_vs_' + (
    'narrow' if SIG_samples == samp.SIG_samples_na else 'broad') + '_sig'

# set up analysis outputs
experiment = ex.Experiment(run_n).setup(model_analysis_dir=True)
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment.run_dir})
print('Running analysis on experiment {}, plotting results to {}'.format(
    run_n, experiment.model_analysis_dir))
# read in data
data = sf.read_inputs_to_jet_sample_dict_from_dir(samp.all_samples, paths)

# *****************************************
#					ROC
# *****************************************
if 'roc' in do_analyses:

    # for each signal
    for SIG_sample, mass_center in zip(SIG_samples, mass_centers):
        # for each type of loss strategy
        for loss_ids, loss_name in zip([
                strategy_ids_reco_kl_loss, strategy_ids_total_loss,
Ejemplo n.º 3
0
                ]
                kernels_mu = list(map(np.mean, kernels))
                kernels_std = list(map(np.std, kernels))

            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        loss_per_epoch /= (step + 1)
        print('### [Epoch {}]: train loss {:.3f} ###'.format(
            epoch, loss_per_epoch))


# ************************ #
#        get data          #
# ************************ #
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': 'run_106'})
qcd_sig_sample = js.JetSample.from_input_dir(
    'qcdSigReco', paths.sample_dir_path('qcdSigReco'), read_n=int(2e6))
print('training on {} events'.format(len(qcd_sig_sample)))
x_train = qcd_sig_sample['mJJ']
y_train = combine_loss_min(qcd_sig_sample)

# ************************ #
#        train model       #
# ************************ #
debug = False
quantile = 0.5
loss_fn = quantile_loss(quantile)
initializer = 'he_uniform'
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,
                                     beta_1=0.9,
Ejemplo n.º 4
0
Parameters = recordtype('Parameters','run_n, qcd_sample_id, qcd_ext_sample_id, qcd_train_sample_id, qcd_test_sample_id, sig_sample_id, strategy_id, epochs, read_n')
params = Parameters(run_n=113, 
                    qcd_sample_id='qcdSigReco', 
                    qcd_ext_sample_id='qcdSigExtReco',
                    qcd_train_sample_id='qcdSigAllTrainReco', 
                    qcd_test_sample_id='qcdSigAllTestReco',
                    sig_sample_id=None, # set sig id later in loop
                    strategy_id='rk5_05',
                    epochs=100,
                    read_n=None)


#****************************************#
#           read in qcd data
#****************************************#
paths = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path({'$run$': 'run_'+str(params.run_n)})

if do_qr:
    # if datasets not yet prepared, prepare them, dump and return (same qcd train and testsample for all signals and all xsecs)
    if make_qcd_train_test_datasample:
        qcd_train_sample, qcd_test_sample_ini = dapr.make_qcd_train_test_datasets(params, paths, **cuts.signalregion_cuts)
    # else read from file
    else:
        qcd_train_sample = js.JetSample.from_input_dir(params.qcd_train_sample_id, paths.sample_dir_path(params.qcd_train_sample_id), read_n=params.read_n) 
        qcd_test_sample_ini = js.JetSample.from_input_dir(params.qcd_test_sample_id, paths.sample_dir_path(params.qcd_test_sample_id), read_n=params.read_n)


#****************************************#
#      for each signal: QR & dijet fit
#****************************************#
Ejemplo n.º 5
0
#test_samples = ['GtoWW15na', 'GtoWW15br', 'GtoWW25na', 'GtoWW25br', 'GtoWW35na', 'GtoWW35br', 'GtoWW45na', 'GtoWW45br']
test_samples = ['GtoWW35na', 'GtoWW35br']
#test_samples = ['qcdSig']

run_n = 101

experiment = ex.Experiment(run_n=run_n)

# ********************************************
#               load model
# ********************************************

vae = VAE_3D(run=run_n, model_dir=experiment.model_dir)
vae.load( )

input_paths = sf.SamplePathDirFactory(sdi.path_dict)
result_paths = sf.SamplePathDirFactory(sdr.path_dict).extend_base_path(experiment.run_dir)

for sample_id in test_samples:

    # ********************************************
    #               read test data (events)
    # ********************************************


    list_ds = tf.data.Dataset.list_files(input_paths.sample_path(sample_id)+'/*')

    for file_path in list_ds:

        file_name = file_path.numpy().decode('utf-8').split(os.sep)[-1]
        test_sample = es.EventSample.from_input_file(sample_id, file_path.numpy().decode('utf-8'))
Ejemplo n.º 6
0
if __name__ == '__main__':

    # sample ids
    sample_ids_grav_na = ['GtoWW15na', 'GtoWW25na', 'GtoWW35na', 'GtoWW45na']
    sample_ids_grav_br = ['GtoWW15br', 'GtoWW25br', 'GtoWW35br', 'GtoWW45br']
    #sample_ids_azzz = ['AtoHZ15', 'AtoHZ20', 'AtoHZ25', 'AtoHZ30', 'AtoHZ35', 'AtoHZ40', 'AtoHZ45']
    sample_ids_qcd = ['qcdSide', 'qcdSideExt', 'qcdSig', 'qcdSigExt']
    sample_ids_all = sample_ids_qcd + sample_ids_grav

    # output paths
    fig_dir = 'fig/thesis/sample_analysis'
    print('plotting to ' + fig_dir)

    # read in all samples
    paths = safa.SamplePathDirFactory(sdfi.path_dict)

    # *****************************************
    #         read in data
    # *****************************************

    read_n = int(1e4)
    # read gravitons into sample dictionary
    data = safa.read_inputs_to_event_sample_dict_from_dir(
        sample_ids_grav_na, paths, read_n=read_n)  # , mJJ=1200.
    # merge main and ext data for qcd
    qcd_side = evsa.EventSample.from_input_dir(
        'qcdSide', paths.sample_dir_path('qcdSide'), read_n=read_n)  # , **cuts
    qcd_side_ext = evsa.EventSample.from_input_dir(
        'qcdSideExt', paths.sample_dir_path('qcdSideExt'), read_n=read_n)
    qcd_sig = evsa.EventSample.from_input_dir('qcdSig',
Ejemplo n.º 7
0
        description='run mjj spectrum analysis with QR cuts applied')
    parser.add_argument('-x',
                        dest='sig_xsec',
                        type=float,
                        default=100.,
                        help='signal injection cross section')
    args = parser.parse_args()

    run = 113
    sample_ids = ['qcdSigAllTestReco', 'GtoWW35brReco']
    quantiles = [0.1, 0.3, 0.5, 0.7, 0.9, 0.99]
    # quantiles = [0.9]
    mjj_key = 'mJJ'
    param_dict = {
        '$run$': str(run),
        '$sig_name$': sample_ids[1],
        '$sig_xsec$': str(int(args.sig_xsec))
    }

    input_paths = sf.SamplePathDirFactory(sdfs.path_dict).update_base_path(
        param_dict)  # in selection paths new format with run_x, sig_x, ...
    fig_dir = exp.Experiment(
        run_n=run,
        param_dict=param_dict).setup(analysis_dir_qr=True).analysis_dir_qr_mjj

    for sample_id in sample_ids:
        for quantile in quantiles:
            sample = js.JetSample.from_input_file(
                sample_id, input_paths.sample_file_path(sample_id))
            plot_mjj_spectrum(sample, quantile, fig_dir)
strategy = lost.loss_strategy_dict['s5']  # L1 & L2 > LT

run_model101 = 101
run_model502 = 502
run_model701 = 701

experiment101 = ex.Experiment(run_model101)
experiment502 = ex.Experiment(run_model502)
experiment701 = ex.Experiment(run_model701)
experiment_result = ex.Experiment(param_dict={
    '$run1$': experiment101.run_dir,
    '$run2$': experiment502.run_dir
}).setup(model_comparison_dir=True)

#read run 102 data
paths102 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment101.run_dir})
data102 = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths102)

#read run 501 data
paths501 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment502.run_dir})
data501 = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths501)

#read run 701 data
paths701 = sf.SamplePathDirFactory(sdfr.path_dict).update_base_path(
    {'$run$': experiment701.run_dir})
data701 = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths701)

if 'roc' in do_analyses:
    # *****************************************
    #					ROC
Ejemplo n.º 9
0

# read in qcd signal region sample
run_n = 101
SM_sample = 'qcdSigAllReco'
#BSM_samples = ['GtoWW15naReco', 'GtoWW15brReco', 'GtoWW25naReco', 'GtoWW25brReco','GtoWW35naReco', 'GtoWW35brReco', 'GtoWW45naReco', 'GtoWW45brReco']
BSM_samples = [
    'GtoWW15naReco', 'GtoWW25naReco', 'GtoWW35naReco', 'GtoWW45naReco'
]
all_samples = [SM_sample] + BSM_samples
mjj_key = 'mJJ'
reco_loss_j1_key = 'j1RecoLoss'
QR_train_share = 0.3

experiment = ex.Experiment(run_n)
paths = sf.SamplePathDirFactory(sd.path_dict).update_base_path(
    {'$run$': experiment.run_dir})

data = sf.read_inputs_to_jet_sample_dict_from_dir(all_samples, paths)

# define quantile and loss-strategy for discimination
quantiles = [0.01, 0.05, 0.1, 0.3, 0.5, 0.7, 0.9]  # 5%
strategy = lost.loss_strategy_dict['rk5']  # L1 & L2 > LT
qcd_sig_sample = data[SM_sample]
#split qcd sample into training and testing
qcd_train, qcd_test = js.split_jet_sample_train_test(qcd_sig_sample,
                                                     QR_train_share)
# update data_dictionary
data[SM_sample] = qcd_test
print(qcd_sig_sample.features())

for quantile in quantiles:
		for sample_name, jet_events in data.items():

			# cut in mJJ and dEta
			jet_events_dEta_cut = jet_events.cut(np.abs(jet_events['DeltaEtaJJ']) > 1.4) # cut on |dEta| >= 1.4
			
			# count jet_events
			n_cut_mjj = len(jet_events)
			n_cut_mjj_dEta = len(jet_events_dEta_cut)

			with np.printoptions(precision=5, suppress=True):

				print("{: <12}: {: >7} n_mjj_cut, {: >7} n_mjj_dEta_cut, {: >5} n_mjj_dEta_cut / n_cut_mjj".format(sample_name, n_cut_mjj, n_cut_mjj_dEta, n_cut_mjj_dEta/float(n_cut_mjj)))

				ff.write(','.join([sample_name] + [str(n) for n in [n_cut_mjj, n_cut_mjj_dEta, np.min(jet_events['DeltaEtaJJ']), np.max(jet_events['DeltaEtaJJ']), np.min(jet_events['mJJ']), np.max(jet_events['mJJ'])]]))
				ff.write('\n')


if __name__ == '__main__':

	full_mjj = False

	sample_ids = sdi.path_dict['sample_dir'].keys()
	paths = sf.SamplePathDirFactory(sdi.path_dict)
	data = sf.read_inputs_to_jet_sample_dict_from_dir(sample_ids, paths, **cuts.signalregion_cuts)
	file_path = './data/event_counts_after_mjj_1200_jetEta_2.4_jetPt_200_cut.csv'

	if full_mjj:
		count_before_mjj_cut(data, file_path)
	else:
		count_after_mjj_cut(data, file_path)
import sarewt.data_reader as dare
import pofah.jet_sample as jesa
import pofah.util.sample_factory as safa
import pofah.path_constants.sample_dict_file_parts_input as sadi
import os
import glob
import copy

path_dict = copy.deepcopy(sadi.path_dict)

base_dir = '/eos/project/d/dshep/TOPCLASS/DijetAnomaly/VAE_results/run_101/sample_results/minRecoKL_loss'

quantiles = ['q1', 'q5', 'q10', 'q30', 'q50', 'q70', 'q90']
sample_ids = ['qcdSigAll', 'GtoWW15na', 'GtoWW25na', 'GtoWW35na', 'GtoWW45na']

for quantile in quantiles:
	path_dict.update(dict(base_dir=os.path.join(base_dir, quantile)))
	paths = safa.SamplePathDirFactory(path_dict)
	data = safa.read_inputs_to_jet_sample_dict_from_dir(sample_ids, paths)
	print('*'*10+'\n'+quantile+'\n'+'*'*10)
	for sample_id in sample_ids:
		accepted_n = len(data[sample_id].accepted())
		rejected_n = len(data[sample_id].rejected())
		print('{:<10}: {:>9} accepted, {:>9} rejetced. ratio acc/total: {:5f}'.format(sample_id, accepted_n, rejected_n, accepted_n/float(len(data[sample_id]))))