label="ttZ", isTrainSample=False, normalization_weight=2., signalSample=True) # path to output directory (adjust NAMING) savepath = basedir + "/workdir/" + "ttZStudies_allVariables_" + str(JTcategory) # initializing DNN training class dnn = DNN.DNN( save_path=savepath, input_samples=input_samples, event_category=JTcategory, train_variables=variables, # number of epochs train_epochs=500, # number of epochs without decrease in loss before stopping early_stopping=20, # metrics for evaluation (c.f. KERAS metrics) eval_metrics=["acc"], # percentage of train set to be used for testing (i.e. evaluating/plotting after training) test_percentage=0.2) #dnn.data.get_non_train_samples() # build default model dnn.build_model() # perform the training dnn.train_model() # evalute the trained model dnn.eval_model()
#input_samples.addSample("ttcc"+naming, label = "ttcc") #input_samples.addSample("ttlf"+naming, label = "ttlf") sampleDict = pputils.readSampleFile(inPath) print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" print sampleDict print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" pputils.addToInputSamples(input_samples, sampleDict) # initializing DNN training class dnn = DNN.DNN( save_path=outputdir, input_samples=input_samples, category_name=options.category, train_variables=variables, # number of epochs train_epochs=int(options.train_epochs), # metrics for evaluation (c.f. KERAS metrics) eval_metrics=["acc"], # percentage of train set to be used for testing (i.e. evaluating/plotting after training) test_percentage=0.2, # balance samples per epoch such that there amount of samples per category is roughly equal balanceSamples=options.balanceSamples) # config dictionary for DNN architecture config = { "layers": [200, 200], "loss_function": "categorical_crossentropy", "Dropout": 0.5, "L2_Norm": 1e-5, "batch_size": 5000, "optimizer": optimizers.Adagrad(decay=0.99),
save_path=savepath, event_classes=event_classes, event_category=categories[key], train_variables=category_vars[key], batch_size=5000, train_epochs=20, early_stopping=5, optimizer="adam", test_percentage=0.2, eval_metrics=["acc"], phi_padding=0) dnn = DNN.DNN(in_path=inPath, save_path=savepath, event_classes=event_classes, event_category=categories[key], train_variables=category_vars[key], train_epochs=500, early_stopping=20, eval_metrics=["acc"]) def plot_confusion_matrix(confusion_matrix, error_confusion_matrix, xticklabels, yticklabels, title, roc, roc_err, save_path, norm_matrix=True, difference=False):
label="ttlf", normalization_weight=1.) if options.isBinary(): input_samples.addBinaryLabel(options.getSignal(), options.getBinaryBkgTarget()) # initializing DNN training class dnn = DNN.DNN( save_path=options.getOutputDir(), input_samples=input_samples, category_name=options.getCategory(), train_variables=options.getTrainVariables(), # number of epochs train_epochs=options.getTrainEpochs(), # metrics for evaluation (c.f. KERAS metrics) eval_metrics=["acc"], # percentage of train set to be used for testing (i.e. evaluating/plotting after training) test_percentage=options.getTestPercentage(), # balance samples per epoch such that there amount of samples per category is roughly equal balanceSamples=options.doBalanceSamples(), evenSel=options.doEvenSelection(), norm_variables=options.doNormVariables()) # build DNN model dnn.build_model(options.getNetConfig()) # perform the training dnn.train_model() # evalute the trained model
variables = variable_set.variables[JTcategory] event_classes = ["ttHbb", "ttbb", "tt2b", "ttb", "ttcc", "ttlf"] inPath = "/ceph/vanderlinden/MLFoyTrainData/DNN/" savepath = basedir + "/workdir/DNN_allVariables_" + str(JTcategory) cmatrix_file = basedir + "/workdir/confusionMatrixData/allVariables_" + str( JTcategory) + ".h5" if not os.path.exists(os.path.dirname(cmatrix_file)): os.makedirs(os.path.dirname(cmatrix_file)) dnn = DNN.DNN(in_path=inPath, save_path=savepath, event_classes=event_classes, event_category=JTcategory, train_variables=variables, train_epochs=500, early_stopping=20, eval_metrics=["acc"], test_percentage=0.2) dnn.build_model() dnn.train_model() dnn.eval_model() dnn.get_input_weights() dnn.rank_input_features() dnn.plot_metrics() # plotting #dnn.save_confusionMatrix(location = cmatrix_file, save_roc = True) dnn.plot_confusionMatrix(norm_matrix=True)
'ge4j_' + '2t': 'N_jets \\geq 4, N_btags = 2', 'ge4j_' + '3t': 'N_jets \\geq 4, N_btags = 3', 'ge4j_' + 'ge4t': 'N_jets \\geq 4, N_btags \\geq 4', 'ge4j_' + 'ge3t': 'N_jets \\geq 4, N_btags \\geq 3', } if options.binary: if not signal: sys.exit( "ERROR: need to specify signal class if binary classification is activated" ) dnn = DNN.loadDNN(inPath, outPath, binary=options.binary, signal=signal, binary_target=options.binary_bkg_target, total_weight_expr=options.total_weight_expr, category_cutString=category_cutString_dict[options.category], category_label=category_label_dict[options.category]) # plotting if options.plot: if options.binary: # plot output node bin_range = [options.binary_bkg_target, 1.] dnn.plot_binaryOutput(log=options.log, privateWork=options.privateWork, printROC=options.printROC, bin_range=bin_range) else:
if "naf" in socket.gethostname(): workpath = "/nfs/dust/cms/user/vdlinden/DRACO-MLfoy/workdir/" else: workpath = "/ceph/vanderlinden/DRACO-MLfoy/workdir/" # path to input data files inPath = workpath+"/AachenDNN_files" # output path in workdir outpath = workpath+"/top10_DNN_"+str(key)+"/" dnn = DNN.DNN( in_path = inPath, save_path = outpath, event_classes = event_classes, event_category = categories[key], train_variables = category_vars[key], additional_cut = None) dnn.load_trained_model() dnn.predict_event_query("(Evt_ID == 7230872)") dnn.predict_event_query("(Evt_ID == 7230984)") dnn.predict_event_query("(Evt_ID == 7231382)") dnn.predict_event_query("(Evt_ID == 7231690)") #dnn.plot_class_differences() #dnn.plot_discriminators() #dnn.plot_classification() #dnn.plot_confusion_matrix() #dnn.plot_output_output_correlation(plot=True) #dnn.plot_input_output_correlation(plot=False)