Beispiel #1
0
def test_training():
    x = 'tests/data/dilepton/QSFUP/X_train_10.npy'
    y = 'tests/data/dilepton/QSFUP/y_train_10.npy'
    x0 = 'tests/data/dilepton/QSFUP/X0_train_10.npy'
    x1 = 'tests/data/dilepton/QSFUP/X1_train_10.npy'
    print("Loaded existing datasets ")

    estimator = RatioEstimator(n_hidden=(10, 10), activation="relu")
    estimator.train(
        method='carl',
        batch_size=1024,
        n_epochs=1,
        x=x,
        y=y,
        x0=x0,
        x1=x1,
        scale_inputs=True,
    )
Beispiel #2
0
if (X0.shape[0] != Y0.shape[0]):
    print('problem when loading, #labels does not match #events, exit')
    exit(1)
if (X0_eventnum is not None):
    if (X0_eventnum.shape[0] != X0.shape[0]):
        print(
            'problem when loading, #eventnumbers does not match #events, exit')
        exit(1)
else:
    # some samples won't have an eventnumber,
    # but ATLAS ones should, since we use it to propagate the weight to reco-level events
    if (DEBUG):
        print("No eventnumber found in dataset.")

# load model and evaluate weights:
carl = RatioEstimator()
if (DEBUG):
    print('Loading model from:', model_out_path)
carl.load(model_out_path + '/carl/')
r_hat, s_hat = carl.evaluate(X0)
# prevent -ve weights (should be rounding only):
r_hat = ensure_positive_weight(r_hat)

# prevent 0-division
r_hat = force_nonzero(r_hat, zero_w_bound)

weights = 1. / r_hat
#  ensure <weights>=1 after cropping
weights = weights * len(weights) / weights.sum()

maxweight = -1
Beispiel #3
0
p = opts.datapath
logger = logging.getLogger(__name__)
if os.path.exists('data/' + sample + '/' + var + '/X_train_' + str(n) +
                  '.npy'):
    logger.info(
        " Doing evaluation of model trained with datasets: %s , generator variation: %s  with %s  events.",
        sample, var, n)
else:
    logger.info(
        " No datasets available for evaluation of model trained with datasets: %s , generator variation: %s  with %s  events.",
        sample, var, n)
    logger.info("ABORTING")
    sys.exit()

loading = Loader()
carl = RatioEstimator()
carl.load('models/' + sample + '/' + var + '_carl_' + str(n))
evaluate = ['train', 'val']
for i in evaluate:
    r_hat, _ = carl.evaluate(x='data/' + sample + '/' + var + '/X0_' + i +
                             '_' + str(n) + '.npy')
    w = 1. / r_hat
    loading.load_result(
        x0='data/' + sample + '/' + var + '/X0_' + i + '_' + str(n) + '.npy',
        x1='data/' + sample + '/' + var + '/X1_' + i + '_' + str(n) + '.npy',
        weights=w,
        label=i,
        do=sample,
        var=var,
        plot=True,
        n=n,
Beispiel #4
0
        weightFeature=weightFeature,
        TreeName=treename,
        randomize=False,
        save=True,
        correlation=True,
        preprocessing=True,
        nentries=n,
        pathA=p + nominal + ".root",
        pathB=p + variation + ".root",
    )
    logger.info(" Loaded new datasets ")
#######################################

#######################################
# Estimate the likelihood ratio
estimator = RatioEstimator(n_hidden=(10, 10, 10), activation="relu")
estimator.train(
    method='carl',
    batch_size=1024,
    n_epochs=100,
    x=x,
    y=y,
    x0=x0,
    x1=x1,
    scale_inputs=True,
)
estimator.save('models/' + global_name + '_carl_' + str(n),
               x,
               metaData,
               export_model=True)
########################################
Beispiel #5
0
logger = logging.getLogger(__name__)
if os.path.exists('data/' + global_name + '/X_train_' + str(n) +
                  '.npy') and os.path.exists('data/' + global_name +
                                             '/metaData_' + str(n) + '.pkl'):
    logger.info(
        " Doing calibration of model trained with datasets: [{},{}], with {} events.",
        nominal, variation, n)
else:
    logger.info(
        " No datasets available for evaluation of model trained with datasets: [{},{}] with {} events."
        .format(nominal, variation, n))
    logger.info("ABORTING")
    sys.exit()

carl = RatioEstimator()
carl.load('models/' + global_name + '_carl_' + str(n))
#load
evaluate = ['train']
X = 'data/' + global_name + '/X_train_' + str(n) + '.npy'
y = 'data/' + global_name + '/y_train_' + str(n) + '.npy'
w = 'data/' + global_name + '/w_train_' + str(n) + '.npy'
r_hat, s_hat = carl.evaluate(X)
calib = CalibratedClassifier(carl, global_name=global_name)
calib.fit(X=X, y=y, w=w)
p0, p1, r_cal = calib.predict(X=X)
w_cal = 1 / r_cal
loading.load_calibration(
    y_true=y,
    p1_raw=s_hat,
    p1_cal=p1,
Beispiel #6
0
#carl-torch inference###
#get the weight from carl-torch (weightCT) evaluated on the same model used for carlAthena and the root file from carlAthena
eventVarsCT = ['Njets', 'MET']
eventVarsCA = ['Njets', 'MET', 'weight']
jetVars = ['Jet_Pt', 'Jet_Mass']
lepVars = ['Lepton_Pt']
xCT, _ = load(f=p + '/test.root',
              events=eventVarsCT,
              jets=jetVars,
              leps=lepVars,
              n=int(n),
              t='Tree',
              do=sample)
xCT = xCT[sorted(xCT.columns)]
carl = RatioEstimator()
carl.load('models/' + sample + '/' + var + '_carl_2000001')
r_hat, s_hat = carl.evaluate(x=xCT.to_numpy())
weightCT = 1. / r_hat

###carlAthena inference###
#load sample with weight infered from carlAthena
xCA, _ = load(f=p + '/test.root',
              events=eventVarsCA,
              jets=jetVars,
              leps=lepVars,
              n=int(n),
              t='Tree')
weightCA = xCA.weight

###compare weights###
Beispiel #7
0
logger = logging.getLogger(__name__)
if os.path.exists('data/' + global_name + '/X_train_' + str(n) +
                  '.npy') and os.path.exists('data/' + global_name +
                                             '/metaData_' + str(n) + '.pkl'):
    logger.info(
        " Doing evaluation of model trained with datasets: [{}, {}], with {} events."
        .format(nominal, variation, n))
else:
    logger.info(
        " No datasets available for evaluation of model trained with datasets: [{},{}] with {} events."
        .format(nominal, variation, n))
    logger.info("ABORTING")
    sys.exit()

loading = Loader()
carl = RatioEstimator()
if model:
    carl.load(model)
else:
    carl.load('models/' + global_name + '_carl_' + str(n))
evaluate = ['train', 'val']
for i in evaluate:
    print("<evaluate.py::__init__>::   Running evaluation for {}".format(i))
    r_hat, s_hat = carl.evaluate(x='data/' + global_name + '/X0_' + i + '_' +
                                 str(n) + '.npy')
    print("s_hat = {}".format(s_hat))
    print("r_hat = {}".format(r_hat))
    w = 1. / r_hat  # I thought r_hat = p_{1}(x) / p_{0}(x) ???
    print("w = {}".format(w))
    print("<evaluate.py::__init__>::   Loading Result for {}".format(i))
    loading.load_result(
Beispiel #8
0
                                         random_seed=random_seed)

    # load samples into carl-torch format
    loading = Loader_edb()
    x, y, x0, x1 = loading.loading(x0=data_x0,
                                   x1=data_x1,
                                   save=True,
                                   folder=data_out_path,
                                   randomize=False,
                                   random_seed=random_seed,
                                   val_frac=0.25,
                                   filter_outliers=True)
    print("Loaded new datasets ")

# now the carl-torch part
estimator = RatioEstimator(n_hidden=n_hidden, activation="relu")

# pop event number, as this should not be used for training
train_loss, val_loss = estimator.train(
    method='carl',
    batch_size=4096,
    n_epochs=n_epochs,
    x=x,
    y=y,
    x0=x0,
    x1=x1,
    scale_inputs=True,
    #early_stopping = True,
    #early_stopping_patience = 10
)
Beispiel #9
0
p = opts.datapath
loading = Loader()
logger = logging.getLogger(__name__)
if os.path.exists('data/' + sample + '/' + var + '/X_train_' + str(n) +
                  '.npy'):
    logger.info(
        " Doing calibration of model trained with datasets: %s , generator variation: %s  with %s  events.",
        sample, var, n)
else:
    logger.info(
        " No datasets available for calibration of model trained with datasets: %s , generator variation: %s  with %s  events.",
        sample, var, n)
    logger.info("ABORTING")
    sys.exit()

carl = RatioEstimator()
carl.load('models/' + sample + '/' + var + '_carl_' + str(n))
#load
evaluate = ['train']
X = 'data/' + sample + '/' + var + '/X_train_' + str(n) + '.npy'
y = 'data/' + sample + '/' + var + '/y_train_' + str(n) + '.npy'
r_hat, s_hat = carl.evaluate(X)
calib = CalibratedClassifier(carl)
calib.fit(X=X, y=y)
p0, p1, r_cal = calib.predict(X=X)
w_cal = 1 / r_cal
loading.load_calibration(
    y_true=y,
    p1_raw=s_hat,
    p1_cal=p1,
    label='calibrated',
Beispiel #10
0
        nentries=n,
        pathA=p + nominal + ".root",
        pathB=p + variation + ".root",
        noTar=True,
        normalise=False,
        debug=False,
    )
    logger.info(" Loaded new datasets ")
#######################################

#######################################
# Estimate the likelihood ratio using a NN model
#   -> Calculate number of input variables as rudimentary guess
structure = ((len(features) * 3, ) * 5)
# Use the number of inputs as input to the hidden layer structure
estimator = RatioEstimator(n_hidden=(structure), activation="relu")
estimator.scaling_method = scale_method

# per epoch plotting
intermediate_train_plot = None
intermediate_save = None
if per_epoch_plot:
    # arguments for training and validation sets for loading.load_result
    train_args = {
        "x0": x0,
        "x1": x1,
        "w0": w0,
        "w1": w1,
        "metaData": metaData,
        "features": features,
        "label": "train",
Beispiel #11
0
# prevent 0-division:
# set this to very low, as we'll also filter large weights
zero_w_bound = np.finfo(float).eps

# crop outlier weights more than N sigma from average
crop_weight_sigma = 5.

# alternatively: crop X% of largest weight
crop_weight_perc = -1.

#-----------------------------------------------------------------------------

if not os.path.exists(out_csv_dir):
    os.makedirs(out_csv_dir)

carl = RatioEstimator()
carl.load(model_out_path + '/carl/')

evaluate = ['train', 'val']
for i in evaluate:

    x0 = data_out_path + '/X0_' + i + '.npy'
    r_hat, s_hat = carl.evaluate(x=data_out_path + '/X0_' + i + '.npy')
    ##    print('what is Carl returning?')
    ##    r=r_hat[0]
    ##    s=s_hat[0]
    ##    print('r=p0/p1,s=p0/(p0+p1)')
    ##    print(r,s,r/(1+r))
    ##    print('r=p1/p0,s=p0/(p0+p1)') # this
    ##    print(r,s,1/(1+r))
    ##    print('r=p0/p1,s=p1/(p0+p1)') # this