예제 #1
0
                        label="ttZ",
                        isTrainSample=False,
                        normalization_weight=2.,
                        signalSample=True)

# path to output directory (adjust NAMING)
savepath = basedir + "/workdir/" + "ttZStudies_allVariables_" + str(JTcategory)

# initializing DNN training class
dnn = DNN.DNN(
    save_path=savepath,
    input_samples=input_samples,
    event_category=JTcategory,
    train_variables=variables,
    # number of epochs
    train_epochs=500,
    # number of epochs without decrease in loss before stopping
    early_stopping=20,
    # metrics for evaluation (c.f. KERAS metrics)
    eval_metrics=["acc"],
    # percentage of train set to be used for testing (i.e. evaluating/plotting after training)
    test_percentage=0.2)

#dnn.data.get_non_train_samples()

# build default model
dnn.build_model()
# perform the training
dnn.train_model()
# evalute the trained model
dnn.eval_model()
예제 #2
0
#input_samples.addSample("ttcc"+naming,  label = "ttcc")
#input_samples.addSample("ttlf"+naming,  label = "ttlf")
sampleDict = pputils.readSampleFile(inPath)
print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
print sampleDict
print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
pputils.addToInputSamples(input_samples, sampleDict)

# initializing DNN training class
dnn = DNN.DNN(
    save_path=outputdir,
    input_samples=input_samples,
    category_name=options.category,
    train_variables=variables,
    # number of epochs
    train_epochs=int(options.train_epochs),
    # metrics for evaluation (c.f. KERAS metrics)
    eval_metrics=["acc"],
    # percentage of train set to be used for testing (i.e. evaluating/plotting after training)
    test_percentage=0.2,
    # balance samples per epoch such that there amount of samples per category is roughly equal
    balanceSamples=options.balanceSamples)

# config dictionary for DNN architecture
config = {
    "layers": [200, 200],
    "loss_function": "categorical_crossentropy",
    "Dropout": 0.5,
    "L2_Norm": 1e-5,
    "batch_size": 5000,
    "optimizer": optimizers.Adagrad(decay=0.99),
예제 #3
0
                          save_path=savepath,
                          event_classes=event_classes,
                          event_category=categories[key],
                          train_variables=category_vars[key],
                          batch_size=5000,
                          train_epochs=20,
                          early_stopping=5,
                          optimizer="adam",
                          test_percentage=0.2,
                          eval_metrics=["acc"],
                          phi_padding=0)

dnn = DNN.DNN(in_path=inPath,
              save_path=savepath,
              event_classes=event_classes,
              event_category=categories[key],
              train_variables=category_vars[key],
              train_epochs=500,
              early_stopping=20,
              eval_metrics=["acc"])


def plot_confusion_matrix(confusion_matrix,
                          error_confusion_matrix,
                          xticklabels,
                          yticklabels,
                          title,
                          roc,
                          roc_err,
                          save_path,
                          norm_matrix=True,
                          difference=False):
                        label="ttlf",
                        normalization_weight=1.)

if options.isBinary():
    input_samples.addBinaryLabel(options.getSignal(),
                                 options.getBinaryBkgTarget())

# initializing DNN training class
dnn = DNN.DNN(
    save_path=options.getOutputDir(),
    input_samples=input_samples,
    category_name=options.getCategory(),
    train_variables=options.getTrainVariables(),
    # number of epochs
    train_epochs=options.getTrainEpochs(),
    # metrics for evaluation (c.f. KERAS metrics)
    eval_metrics=["acc"],
    # percentage of train set to be used for testing (i.e. evaluating/plotting after training)
    test_percentage=options.getTestPercentage(),
    # balance samples per epoch such that there amount of samples per category is roughly equal
    balanceSamples=options.doBalanceSamples(),
    evenSel=options.doEvenSelection(),
    norm_variables=options.doNormVariables())

# build DNN model
dnn.build_model(options.getNetConfig())

# perform the training
dnn.train_model()

# evalute the trained model
예제 #5
0
variables = variable_set.variables[JTcategory]

event_classes = ["ttHbb", "ttbb", "tt2b", "ttb", "ttcc", "ttlf"]

inPath = "/ceph/vanderlinden/MLFoyTrainData/DNN/"
savepath = basedir + "/workdir/DNN_allVariables_" + str(JTcategory)
cmatrix_file = basedir + "/workdir/confusionMatrixData/allVariables_" + str(
    JTcategory) + ".h5"
if not os.path.exists(os.path.dirname(cmatrix_file)):
    os.makedirs(os.path.dirname(cmatrix_file))

dnn = DNN.DNN(in_path=inPath,
              save_path=savepath,
              event_classes=event_classes,
              event_category=JTcategory,
              train_variables=variables,
              train_epochs=500,
              early_stopping=20,
              eval_metrics=["acc"],
              test_percentage=0.2)

dnn.build_model()
dnn.train_model()
dnn.eval_model()
dnn.get_input_weights()
dnn.rank_input_features()
dnn.plot_metrics()

# plotting
#dnn.save_confusionMatrix(location = cmatrix_file, save_roc = True)
dnn.plot_confusionMatrix(norm_matrix=True)
예제 #6
0
    'ge4j_' + '2t': 'N_jets \\geq 4, N_btags = 2',
    'ge4j_' + '3t': 'N_jets \\geq 4, N_btags = 3',
    'ge4j_' + 'ge4t': 'N_jets \\geq 4, N_btags \\geq 4',
    'ge4j_' + 'ge3t': 'N_jets \\geq 4, N_btags \\geq 3',
}

if options.binary:
    if not signal:
        sys.exit(
            "ERROR: need to specify signal class if binary classification is activated"
        )

dnn = DNN.loadDNN(inPath,
                  outPath,
                  binary=options.binary,
                  signal=signal,
                  binary_target=options.binary_bkg_target,
                  total_weight_expr=options.total_weight_expr,
                  category_cutString=category_cutString_dict[options.category],
                  category_label=category_label_dict[options.category])

# plotting
if options.plot:
    if options.binary:
        # plot output node
        bin_range = [options.binary_bkg_target, 1.]
        dnn.plot_binaryOutput(log=options.log,
                              privateWork=options.privateWork,
                              printROC=options.printROC,
                              bin_range=bin_range)

    else:
예제 #7
0
if "naf" in socket.gethostname():
    workpath = "/nfs/dust/cms/user/vdlinden/DRACO-MLfoy/workdir/"
else:
    workpath = "/ceph/vanderlinden/DRACO-MLfoy/workdir/"


# path to input data files
inPath = workpath+"/AachenDNN_files"
# output path in workdir
outpath = workpath+"/top10_DNN_"+str(key)+"/"

dnn = DNN.DNN(
    in_path             = inPath,
    save_path           = outpath,
    event_classes       = event_classes,
    event_category      = categories[key],
    train_variables     = category_vars[key],
    additional_cut      = None)

dnn.load_trained_model()
dnn.predict_event_query("(Evt_ID == 7230872)")
dnn.predict_event_query("(Evt_ID == 7230984)")
dnn.predict_event_query("(Evt_ID == 7231382)")
dnn.predict_event_query("(Evt_ID == 7231690)")
#dnn.plot_class_differences()
#dnn.plot_discriminators()
#dnn.plot_classification()
#dnn.plot_confusion_matrix()
#dnn.plot_output_output_correlation(plot=True)
#dnn.plot_input_output_correlation(plot=False)