def Evaluate(outdir): sys.stdout = open(outdir + '/tmva.log', 'w') # Output file output = TFile(outdir + '/tmva.root', 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" ) # Set the variables use for the analysis input = open(outdir + '/inputvars.txt') for variable in input.readlines(): factory.AddVariable(variable[:-1], 'F') # Set the weight directory TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights" # Limit the creation of correlation plots TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20 # Set the input file with signal and background events factory.SetInputTrees( outdir + '/signals.txt', outdir + '/backgrounds.txt' ) cutsig = TCut('') cutbkg = TCut('') factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" ) factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. output.Close()
def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs): # (re)configure settings since deleting a previous Factory resets all # this. This is poor design, TMVA. config = TMVA.gConfig() config.GetIONames().fWeightFileDir = self.tmpdir config.SetSilent(True) config.SetDrawProgressBar(False) self.factory.DeleteAllMethods() extra_kwargs = dict() if self.task == "Regression": func = rnp.tmva.add_regression_events else: func = rnp.tmva.add_classification_events extra_kwargs["signal_label"] = signal_label # test exceptions assert_raises(TypeError, func, object(), X, y) assert_raises(ValueError, func, self.factory, X, y[: y.shape[0] / 2]) if weights is not None: assert_raises(ValueError, func, self.factory, X, y, weights=weights[: weights.shape[0] / 2]) assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis]) assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1]) assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]]) func(self.factory, X, y, weights=weights, **extra_kwargs) if X_test is None: X_test = X y_test = y weights_test = weights func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs) self.factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents") options = [] for param, value in kwargs.items(): if value is True: options.append(param) elif value is False: options.append("!{0}".format(param)) else: options.append("{0}={1}".format(param, value)) options = ":".join(options) self.factory.BookMethod(self.method, self.method, options) self.factory.TrainAllMethods()
def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs): # (re)configure settings since deleting a previous Factory resets all # this. This is poor design, TMVA. config = TMVA.gConfig() config.GetIONames().fWeightFileDir = self.tmpdir config.SetSilent(True) config.SetDrawProgressBar(False) self.factory.DeleteAllMethods() extra_kwargs = dict() if self.task == 'Regression': func = rnp.tmva.add_regression_events else: func = rnp.tmva.add_classification_events extra_kwargs['signal_label'] = signal_label # test exceptions assert_raises(TypeError, func, object(), X, y) assert_raises(ValueError, func, self.factory, X, y[:y.shape[0] / 2]) if weights is not None: assert_raises(ValueError, func, self.factory, X, y, weights=weights[:weights.shape[0]/2]) assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis]) assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1]) assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]]) func(self.factory, X, y, weights=weights, **extra_kwargs) if X_test is not None and y_test is not None: func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs) self.factory.PrepareTrainingAndTestTree( TCut('1'), 'NormMode=EqualNumEvents') options = ':'.join(['{0}={1}'.format(param, value) for param, value in kwargs.items()]) if options: options = ':' + options self.factory.BookMethod(self.method, self.method, options) self.factory.TrainAllMethods()
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass outfname = "weights/TMVA_" + Note + ".root" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI # gROOT.SetMacroPath( "./" ) # gROOT.Macro ( "./TMVAlogon.C" ) # gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string # factory = TMVA.Factory( "TMVAClassification", outputFile, # "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] for iVar in varList: if iVar[0] == 'NJets_singleLepCalc': factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I') else: factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables inputDir = varsList.inputDir print 'mass point ' + mass infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % ( mass) iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") # os.exits(1) #BDT machinary factory.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir + varsList.bkg[i])) print inputDir + varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue factory.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) factory.SetSignalWeightExpression(weightStrS) factory.SetBackgroundWeightExpression(weightStrB) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, # "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" ) "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # bdtSetting for "BDT" bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTMitFisher" bdtFSetting = '!H:!V:NTrees=%s' % nTrees bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20' bdtFSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTG" bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20' bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG #bdtGSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTB" bdtBSetting = '!H:!V:NTrees=%s' % nTrees bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20' bdtBSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTD" bdtDSetting = '!H:!V:NTrees=%s' % nTrees bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate' bdtDSetting += ':IgnoreNegWeightsInTraining=True' #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m #BOOKING AN ALGORITHM if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting) if methods == "BDTG": factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting) if methods == "BDTMitFisher": factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting) if methods == "BDTB": factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting) if methods == "BDTD": factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # # print "=== wrote root file %s\n" % outfname # print "=== TMVAClassification is done!\n" # save plots: os.chdir('weights/' + Note) #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies gROOT.SetBatch(1) TMVA.efficiencies( "../../" + outfname ) #Classifier Background Rejection vs Signal Efficiency (ROC curve) #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve) TMVA.mvas("../../" + outfname, 0) #Classifier Output Distributions (test sample) TMVA.correlations( "../../" + outfname) #Input Variable Linear Correlation Coefficients TMVA.variables("../../" + outfname) #Input variables (training sample) #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed) #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed) if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname) # os.chdir('plots') # try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png') # except: pass # try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png') # except: pass # #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png') # #except: pass # try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png') # except: pass # try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png') # except: pass # try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png') # except: pass # try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png') # except: pass # os.system('rm *.eps') print "DONE"
'BDTD': 0, 'BDTF': 0, 'MLP': 0, 'MLPBFGS': 0, 'MLPBNN': 0, 'CFMlpANN': 0, 'TMlpANN': 0 } factory = t.Factory( 'vbf_bdt_combined_james_current', outputFile, '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification' ) factory.Print() t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir dataloader = t.DataLoader(".") dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F') dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F') dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F') dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F') dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F') dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F') dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F') dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james', 'F') dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F') dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F') # trees for training
loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=" + str(NSIG) +\ ":nTrain_Background=" + str(NBKG) +\ ":nTest_Signal=" + str(NSIG_TEST) +\ ":nTest_Background=" + str(NBKG_TEST) +\ ":SplitMode=Random:NormMode=NumEvents:!V" ) factory = TMVA.Factory( "Optimization", '!V:!ROC:!Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification' ) (TMVA.gConfig().GetIONames()).fWeightFileDir = '/weights' kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + modelName +\ ':SaveBestOnly=true' +\ ':NumEpochs=' + str(epochs) +\ ':BatchSize=' + str(batchSize) +\ ':TriesEarlyStopping=' + str(patience) factory.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() ROC = factory.GetROCIntegral(dataset + "/optimize_" + outf_key, "PyKeras")
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) factory.AddVariable( "MMC_h2massweight1_prob", "MMC_h2massweight1_prob", "", 'F' ) ##ADDED # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): # runs the program checkRootVer() # check that ROOT version is correct try: # retrieve command line options shortopts = "d:o:v:w:y:h?" # possible command line options longopts = [ "dataset=", "option=", "where=", "year=", "verbose", "help", "usage" ] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts) # associates command line inputs to variables except getopt.GetoptError: # output error if command line argument invalid print("ERROR: unknown options in argument %s" % sys.argv[1:]) usage() sys.exit(1) myArgs = np.array( [ # Stores the command line arguments ['-d', '--dataset', 'dataset', 'dataset'], ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017], ['-o', '--option', 'option', 0], ['-v', '--verbose', 'verbose', True] ], dtype="object") for opt, arg in opts: if opt in myArgs[:, 0]: index = np.where( myArgs[:, 0] == opt)[0][0] # np.where returns a tuple of arrays myArgs[index, 3] = str( arg) # override the variables with the command line argument elif opt in myArgs[:, 1]: index = np.where(myArgs[:, 1] == opt)[0][0] myArgs[index, 3] = arg if opt in ("-?", "-h", "--help", "--usage"): # provides command line help usage() sys.exit(0) # Initialize some containers bkg_list = [] bkg_trees_list = [] sig_list = [] sig_trees_list = [] # Initialize some variables after reading in arguments option_index = np.where(myArgs[:, 2] == 'option')[0][0] dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0] verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0] where_index = np.where(myArgs[:, 2] == 'where')[0][0] year_index = np.where(myArgs[:, 2] == 'year')[0][0] DATASETPATH = myArgs[dataset_index][3] DATASET = DATASETPATH.split("/")[0] OPTION = myArgs[option_index][3] VERBOSE = myArgs[verbose_index][3] WHERE = myArgs[where_index][3] YEAR = myArgs[year_index][3] if WHERE == "lpc": if YEAR == 2017: inputDir = varsList.inputDirLPC2017 elif YEAR == 2018: inputDir = varsList.inputDirLPC2018 else: if YEAR == 2017: inputDir = varsList.inputDirBRUX2017 elif YEAR == 2018: inputDir = varsList.inputDirBRUX2018 if OPTION == "0": print("Using Option 0: default varList") varList = varsList.varList["DNN"] elif OPTION == "1": print("Using Option 1: selected data from {}".format(DATASETPATH)) varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines() varList = [] START = False for line in varsListHPO: if START == True: varList.append(str(line.strip())) if "Variable List:" in line: START = True numVars = len(varList) outf_key = str("Keras_" + str(numVars) + "vars") OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root" outputfile = TFile(OUTF_NAME, "RECREATE") # initialize and set-up TMVA factory factory = TMVA.Factory( "Training", outputfile, "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose(bool(myArgs[verbose_index, 3])) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key # initialize and set-up TMVA loader loader = TMVA.DataLoader(DATASET) if OPTION == "0": for var in varList: if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0], var[1], var[2], 'I') else: loader.AddVariable(var[0], var[1], var[2], "F") if OPTION == "1": for var in varList: if var == "NJets_MultiLepCalc": loader.AddVariable(var, "", "", "I") else: loader.AddVariable(var, "", "", "F") # add signal files if YEAR == 2017: for i in range(len(varsList.sig2017_2)): sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) elif YEAR == 2018: for i in range(len(varsList.sig2018_2)): sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) # add background files if YEAR == 2017: for i in range(len(varsList.bkg2017_2)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) elif YEAR == 2018: for i in range(len(varsList.bkg2018_2)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) loader.SetSignalWeightExpression(weightStrS) loader.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) ###################################################### ###################################################### ###### ###### ###### K E R A S D N N ###### ###### ###### ###################################################### ###################################################### HIDDEN = 0 NODES = 0 LRATE = 0. PATTERN = "" REGULATOR = "" ACTIVATION = "" BATCH_SIZE = 0 # modify this when implementing hyper parameter optimization: model_name = 'TTTT_' + str(numVars) + 'vars_model.h5' EPOCHS = 100 PATIENCE = 20 # edit these based on hyper parameter optimization results if OPTION == "0": HIDDEN = 3 NODES = 100 LRATE = 0.01 PATTERN = 'static' REGULATOR = 'none' ACTIVATION = 'relu' BATCH_SIZE = 256 if OPTION == "1": datasetDir = os.listdir(DATASETPATH) for file in datasetDir: if "params" in file: optFileName = file optFile = open(DATASETPATH + "/" + optFileName, "r").readlines() START = False for line in optFile: if START == True: if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip()) if "Initial" in line: NODES = int(line.split(":")[1].strip()) if "Batch" in line: BATCH_SIZE = 2**int(line.split(":")[1].strip()) if "Learning" in line: LRATE = float(line.split(":")[1].strip()) if "Pattern" in line: PATTERN = str(line.split(":")[1].strip()) if "Regulator" in line: REGULATOR = str(line.split(":")[1].strip()) if "Activation" in line: ACTIVATION = str(line.split(":")[1].strip()) if "Optimized Parameters:" in line: START = True kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \ ':SaveBestOnly=true' + \ ':NumEpochs=' + str(EPOCHS) + \ ':BatchSize=' + str(BATCH_SIZE) + \ ':TriesEarlyStopping=' + str(PATIENCE) model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION, numVars) model.save(model_name) model.summary() factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputfile.Close() print("Finished training in " + str( (time.time() - START_TIME) / 60.0) + " minutes.") ROC = factory.GetROCIntegral(DATASET, 'PyKeras') print('ROC value is: {}'.format(ROC)) if OPTION == "1": varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a") varsListHPOtxt.write("ROC Value: {}".format(ROC))
import sys import os from ROOT import TMVA, TFile, TCut from config import cfg, variables_iso_only from os.path import join import uproot import root_pandas TMVA.Tools.Instance() (TMVA.gConfig().GetVariablePlotting()).fNbinsXOfROCCurve = 400 out_dir_base = join(cfg["out_dir"], cfg['submit_version']) max_n_per_class = 200000 for idname in cfg["trainings"]: for training_bin in cfg["trainings"][idname]: print("Process training pipeline for {0} {1}".format(idname, training_bin)) out_dir = join(out_dir_base, idname, training_bin, "legacy") if not os.path.exists(out_dir): os.makedirs(join(out_dir)) feature_cols = cfg["trainings"][idname][training_bin]["variables"] outfileName = join(out_dir, "TMVA.root") print("---> Working with OutfileName = " + outfileName);
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:s:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH varListKey = DEFAULT_VARLISTKEY verbose = True SeedN = DEFAULT_SEED for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-s", "--seed"): SeedN = long(a) elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth outfname = "dataset/weights/TMVA_" + Note + ".root" # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3 # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA fClassifier = TMVA.Factory( "VariableImportance", "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification" ) str_xbitset = '{:053b}'.format(SeedN) seeddl = TMVA.DataLoader(str_xbitset) bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' index = 52 for iVar in varList: if (str_xbitset[index] == '1'): seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F') print iVar[0] index = index - 1 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") seeddl.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] bkgList = varsList.bkg for i in range(len(bkgList)): bkg_list.append(TFile.Open(inputDir + bkgList[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue seeddl.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 seeddl.SetSignalWeightExpression(weightStrS) seeddl.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) seeddl.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting) fClassifier.TrainAllMethods() fClassifier.TestAllMethods() fClassifier.EvaluateAllMethods() SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT") print "ROC-integral : ", str_xbitset, " ", SROC print "SEED " + str_xbitset + " DONE" fClassifier.DeleteAllMethods() fClassifier.fMethodsMap.clear() print "==================================================================" print "=================================================================="
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) #! gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' ) #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' ) #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
## * Train a BDT with TMVA ## ## \macro_image ## \macro_output ## \macro_code ## ## \author Lailin XU ## Modified from [RegressionKeras.py](https://root.cern/doc/master/RegressionKeras_8py.html) and [TMVARegression.C](https://root.cern/doc/master/TMVARegression_8C.html) from ROOT import TMVA, TFile, TTree, TCut from subprocess import call from os.path import isfile # Setup TMVA TMVA.Tools.Instance() (TMVA.gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = 5 outfileName = 'TMVA_tutorial_reg_1.root' output = TFile.Open(outfileName, 'RECREATE') factory = TMVA.Factory('TMVARegression', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Regression') # Load data trfile = "SM_ttbar.root" if not isfile('tmva_reg_example.root'): call(['curl', '-L', '-O', 'http://root.cern.ch/files/tmva_reg_example.root']) data = TFile.Open(trfile) if not data: print("Error! file not opened", trfile) trname = "tree" tree = data.Get(trname)
test_size=0.2, #train_size=10000, #test_size=5000, random_state=0) # Training if not args.quiet: print 'start training ...' if args.timeit: start = timer() output = TFile(args.outdir + 'tmva_output.root', 'recreate') factory = TMVA.Factory( 'TMVA', output, 'AnalysisType=Classification:' '!V:Silent:!DrawProgressBar') dataloader = TMVA.DataLoader("") TMVA.gConfig().GetIONames().fWeightFileDir = args.outdir + 'weights/' for v in var: vtype = 'I' if v in [ 'nJet', 'tau0_decaymode', 'tau1_decaymode', 'ntags', 'ntags_loose' ] else 'F' dataloader.AddVariable(v, vtype) add_classification_events(dataloader, x_train, y_train, weights=w_train) add_classification_events(dataloader, x_test, y_test, weights=w_test, test=True) norm = 'None'
def main(): # runs the program try: # retrieve command line options shortopts = "o:w:y:v:s:h?" # possible command line options longopts = [ "outputfile=", "where=", "year=", "verbose", "seed=", "help", "usage" ] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts) # associates command line inputs to variables except getopt.GetoptError: # output error if command line argument invalid print("ERROR: unknown options in argument %s" % sys.argv[1:]) usage() sys.exit(1) myArgs = np.array([ # Stores the command line arguments ['-o', '--outputfile', 'outfname', DEFAULT_OUTFNAME], ['-v', '--verbose', 'verbose', True], ['-w', '--where', 'where', "lpc"], ['-y', '--year', 'year', 2017], ['-s', '--seed', 'SeedN', DEFAULT_SEED], ]) for opt, arg in opts: if opt in myArgs[:, 0]: index = np.where( myArgs[:, 0] == opt)[0][0] # np.where returns a tuple of arrays myArgs[ index, 3] = arg # override the variables with the command line argument elif opt in myArgs[:, 1]: index = np.where(myArgs[:, 1] == opt)[0][0] myArgs[index, 3] = arg if opt in ("-?", "-h", "--help", "--usage"): # provides command line help usage() sys.exit(0) # Initialize some variables after reading in arguments SeedN_index = np.where(myArgs[:, 2] == 'SeedN')[0][0] outfname_index = np.where(myArgs[:, 2] == 'outfname')[0][0] verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0] where_index = np.where(myArgs[:, 2] == 'where')[0][0] year_index = np.where(myArgs[:, 2] == 'year')[0][0] seed = myArgs[SeedN_index, 3] where = myArgs[where_index, 3] year = int(myArgs[year_index, 3]) varList = varsList.varList["DNN"] var_length = len(varList) str_xbitset = '{:0{}b}'.format(long(myArgs[SeedN_index, 3]), var_length) nVars = str_xbitset.count('1') outf_key = "DNN_" + str(nVars) + "vars" myArgs[outfname_index, 3] = "dataset/weights/TMVA_" + outf_key + ".root" print("Seed: {}".format(str_xbitset)) outputfile = TFile(myArgs[outfname_index, 3], 'RECREATE') checkRootVer() # check that ROOT version is correct ###################################################### ###################################################### ###### ###### ###### T M V A ###### ###### ###### ###################################################### ###################################################### # Declare some containers sig_list = [] sig_trees_list = [] bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] if where == "brux": if year == 2017: inputDir = varsList.inputDirBRUX2017 elif year == 2018: inputDir = varsList.inputDirBRUX2018 else: inputDir = varsList.inputDirCondor # Set up TMVA ROOT.TMVA.Tools.Instance() ROOT.TMVA.PyMethodBase.PyInitialize() fClassifier = TMVA.Factory( 'VariableImportance', '!V:!ROC:Silent:!Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification' ) fClassifier.SetVerbose(bool(myArgs[verbose_index, 3])) loader = TMVA.DataLoader("dataset/" + str_xbitset) for indx, var in enumerate(varList): if (str_xbitset[indx] == '1'): if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0], var[1], var[2], "I") else: loader.AddVariable(var[0], var[1], var[2], "F") # add signals to loader if year == 2017: for i in range(len(varsList.sig2017_0)): sig_list.append(TFile.Open(inputDir + varsList.sig2017_0[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) elif year == 2018: for i in range(len(varsList.sig2018_0)): sig_list.append(TFile.Open(inputDir + varsList.sig2018_0[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) # add backgrounds to loader if year == 2017: for i in range(len(varsList.bkg2017_0)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_0[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) elif year == 2018: for i in range(len(varsList.bkg2018_0)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_0[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) # set signal and background weights loader.SetSignalWeightExpression(weightStrS) loader.SetBackgroundWeightExpression(weightStrB) # set cut thresholds for signal and background mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) NSIG = 0 NSIG_TEST = 0 NBKG = 0 NBKG_TEST = 0 loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=" + str(NSIG) + \ ":nTrain_Background=" + str(NBKG) + \ ":nTest_Signal=" + str(NSIG_TEST) + \ ":nTest_Background=" + str(NBKG_TEST) + \ ":SplitMode=Random:NormMode=NumEvents:!V" ) ##################################################### ##################################################### ###### ###### ###### K E R A S D N N ###### ###### ###### ##################################################### ##################################################### model_name = "TTTT_TMVA_model.h5" model = Sequential() model.add( Dense(100, input_dim=nVars, kernel_initializer="glorot_normal", activation="relu")) for i in range(2): model.add(BatchNormalization()) model.add( Dense(100, kernel_initializer="glorot_normal", activation="relu")) model.add(Dense(2, activation="sigmoid")) model.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["accuracy"]) model.save(model_name) model.summary() ###################################################### ###################################################### ###### ###### ###### T M V A ###### ###### ###### ###################################################### ###################################################### # Declare some containers kerasSetting = "!H:!V:VarTransform=G:FilenameModel=" + model_name + \ ":NumEpochs=15:BatchSize=512" # the trained model has to be specified in this string # run the classifier fClassifier.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting) (TMVA.gConfig().GetIONames() ).fWeightFileDir = str_xbitset + "/weights/" + outf_key #print("New weight file directory: {}".format((TMVA.gConfig().GetIONames()).fWeightFileDir)) fClassifier.TrainAllMethods() fClassifier.TestAllMethods() fClassifier.EvaluateAllMethods() SROC = fClassifier.GetROCIntegral("dataset/" + str_xbitset, "PyKeras") print("ROC-integral: {}".format(SROC)) fClassifier.DeleteAllMethods() fClassifier.fMethodsMap.clear() outputfile.Close()
## ## \macro_image ## \macro_output ## \macro_code ## ## \author Lailin XU ## Modified from [ClassificationKeras.py](https://root.cern/doc/master/ClassificationKeras_8py.html) and [TMVAClassification.C](https://root.cern/doc/master/TMVAClassification_8C.html) from ROOT import TMVA, TFile, TTree, TCut from subprocess import call from os.path import isfile # Setup TMVA # ======================= TMVA.Tools.Instance() (TMVA.gConfig().GetVariablePlotting() ).fMaxNumOfAllowedVariablesForScatterPlots = 5 outfileName = 'TMVA_tutorial_cla_1.root' output = TFile.Open(outfileName, 'RECREATE') # Create the factory object. Later you can choose the methods whose performance you'd like to investigate. The factory is # the only TMVA object you have to interact with # # The first argument is the base of the name of all the weightfiles in the directory weight/ # The second argument is the output file for the training results factory = TMVA.Factory( "TMVAClassification", output, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Load data
def main(): try: # retrive command line options shortopts = "a:o:r:vh?" longopts = ["analysis=","outputfile=", "regression=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) _outfname = OUTFNAME _analysis = ANALYSIS verbose = False _regression = REGRESSION for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-o", "--outputfile"): _outfname = a elif o in ("-a", "--analysis"): _analysis = a elif o in ("-r", "--regression"): _regression = True elif o in ("-v", "--verbose"): verbose = True # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( _outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" TMVA.gConfig().GetIONames().fWeightFileDir = "weights_" + _analysis # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] if _analysis == "Dijet": if not _regression: factory.AddVariable("H_mass := H.mass", 'F'); factory.AddVariable("H_pt :=H.pt", 'F'); factory.AddVariable("hJet_pt1 := hJet_pt[0]", 'F') factory.AddVariable("hJet_pt2 := hJet_pt[1]", 'F') else: factory.AddVariable("HCorr_mass := newHiggsMass", 'F'); factory.AddVariable("HCorr_pt := newHiggsPt", 'F'); factory.AddVariable("hJ1Corr_pt := hJet_genPtReg0", 'F'); factory.AddVariable("hJ2Corr_pt := hJet_genPtReg1", 'F'); factory.AddVariable("V_pt :=V.pt", 'F'); factory.AddVariable("H_dR := H.dR", 'F'); factory.AddVariable("hJ12_MaxCsv := max(hJet_csv[0],hJet_csv[1])", 'F'); factory.AddVariable("hJ12_MinCsv := min(hJet_csv[0],hJet_csv[1])", 'F'); factory.AddVariable("HV_dPhi := HVdPhi", 'F'); factory.AddVariable("H_dEta := H.dEta", 'F'); factory.AddVariable("NAddJet:=Sum$(aJet_pt>20 && abs(aJet_eta)<4.5)", 'I' ); factory.AddVariable("dPull := deltaPullAngle", 'F'); # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables #factory.AddSpectator("hJet_pt1 := hJet_pt[0]", 'F'); #factory.AddSpectator("hJet_pt2 := hJet_pt[1]", 'F'); elif _analysis == "Subjet": if not _regression: factory.AddVariable("H_mass := FatH.filteredmass", 'F'); factory.AddVariable("H_pt := FatH.filteredpt", 'F'); factory.AddVariable("SJ1_pt := fathFilterJets_pt[0]", 'F'); factory.AddVariable("SJ2_pt := fathFilterJets_pt[1]", 'F'); factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); else: factory.AddVariable("HCorr_mass := newfatHiggsMass", 'F'); factory.AddVariable("HCorr_pt := newfatHiggsPt", 'F'); factory.AddVariable("SJ1Corr_pt := fathFilterJets_genPtReg0", 'F'); factory.AddVariable("SJ2Corr_pt := fathFilterJetsx_genPtReg1", 'F'); factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); #change later factory.AddVariable("V_pt := V.pt", 'F'); factory.AddVariable("HV_dPhi := " +\ "FatH.filteredphi - V.phi > pi ? " +\ "abs(FatH.filteredphi - V.phi - 2*pi) : " +\ "FatH.filteredphi - V.phi < -pi ? " +\ "abs(FatH.filteredphi - V.phi + 2*pi) : " +\ "abs(FatH.filteredphi - V.phi)", 'F' ) factory.AddVariable("SJ1_csv := fathFilterJets_csv[0]", 'F'); factory.AddVariable("SJ2_csv := fathFilterJets_csv[1]", 'F'); factory.AddVariable("SJ3_csv := Alt$(fathFilterJets_csv[2],0)", 'F'); factory.AddVariable("SJ12_dEta := " +\ "nfathFilterJets < 2 ? 0 : " +\ "fabs(fathFilterJets_eta[0] - fathFilterJets_eta[1] )", 'F'); factory.AddVariable("SJ13_dEta := " +\ "nfathFilterJets < 3 ? 0 : " +\ "abs( fathFilterJets_eta[0] - Alt$(fathFilterJets_eta[2],0))", 'F'); factory.AddVariable("SJ12_dPhi := " +\ "nfathFilterJets < 2 ? 0 : " +\ "fathFilterJets_phi[0] - fathFilterJets_phi[1] > pi ? " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] - 2*pi) : " +\ "fathFilterJets_phi[0] - fathFilterJets_phi[1] < -pi ? " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] + 2*pi) : " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1])", 'F'); factory.AddVariable("SJ13_dPhi := " +\ "nfathFilterJets < 3 ? 0 : " +\ "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) > pi ? " +\ "abs(fathFilterJets_phi[0] - " +\ "Alt$(fathFilterJets_phi[2],0) - 2*pi) : " +\ "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) < -pi ? " +\ "abs(fathFilterJets_phi[0] - " +\ "Alt$(fathFilterJets_phi[2],0) + 2*pi) : " +\ "abs(fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0))", 'F'); factory.AddVariable("SJ12_dR := " +\ "nfathFilterJets < 2 ? 0 : " +\ "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],fathFilterJets_eta[1],fathFilterJets_phi[1])", 'F'); factory.AddVariable("SJ13_dR := nfathFilterJets < 3 ? 0 : " +\ "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],Alt$(fathFilterJets_eta[2],0),Alt$(fathFilterJets_phi[2],0))", 'F'); factory.AddVariable("NAddJet:= " +\ "nfathFilterJets < 2 ? 0 : " +\ "Sum$(aJet_pt>20 && abs(aJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],aJet_eta,aJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],aJet_eta,aJet_phi)>0.3)+Sum$(hJet_pt>20 && abs(hJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],hJet_eta,hJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],hJet_eta,hJet_phi)>0.3)", 'I' ); else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) ## Get the Signal and Background trees for Sample in SAMPLES.keys(): SampleInfo=SAMPLES[Sample] SampleType=SampleInfo[0] # signal or background infile=os.path.join(INPUTDIR,SampleInfo[1]) xs=SampleInfo[2] ## get number of step 1 events f=TFile.Open(infile) h = f.Get("Count") nEVT=int(h.GetBinContent(1)) wt =xs/(nEVT) print Sample,": ",infile print "XS:nEVT:wt: ", xs,nEVT,wt theTree = f.Get( TREE ) if SampleType == "S": factory.AddSignalTree ( theTree, wt ) elif SampleType == "B": factory.AddBackgroundTree( theTree, wt ) else: print "Trouble extracting SampleType for this sample" sys.exit(1) # table10 AN-2011/430 if _analysis == "Dijet": cutString=\ "Vtype == 0" + " && " +\ "vLepton_pt[0]>20." + " && " +\ "H.HiggsFlag > 0" + " && " +\ "V.mass > 75.0" + " && " +\ "V.mass < 105.0" + " && " +\ "V.pt > 100.0" + " && " +\ "max(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " +\ "min(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " if not _regression: cutString += \ "hJet_pt[0] > 20.0" + " && " +\ "hJet_pt[1] > 20.0" + " && " +\ "H.mass > 80.0" + " && " +\ "H.mass < 150.0" else: cutString += \ "hJet_genPtReg0 > 20.0" + " && " +\ "hJet_genPtReg0 > 20.0" + " && " +\ "newHiggsMass > 80.0 && newHiggsMass < 150.0" elif _analysis == "Subjet": cutString=\ "Vtype == 0" + " && " +\ "vLepton_pt[0]>20." + " && " +\ "FatH.FatHiggsFlag > 0" + " && " +\ "V.mass > 75.0" + " && " +\ "V.mass < 105.0" + " && " +\ "V.pt > 100.0" + " && " +\ "nfathFilterJets >= 2" + " && " +\ "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && " +\ "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && " if not _regression: cutString += \ "fathFilterJets_pt[0] > 20.0" + " && " +\ "fathFilterJets_pt[1] > 20.0" + " && " +\ "FatH.filteredmass > 80.0" + " && " +\ "FatH.filteredmass < 150.0" else: cutString += \ "fathFilterJets_genPtReg0 > 20.0" + " && " +\ "fathFilterJets_genPtReg0 > 20.0" + " && " +\ "newfatHiggsMass > 80.0 && newfatHiggsMass < 150.0" else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) print cutString mycutSig = TCut( cutString ) mycutBkg = TCut( cutString ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples prepareOptions="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" #prepareOptions="SplitMode=Random:!V" factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, prepareOptions) bdtOptions = \ "!H" + ":" +\ "!V" + ":" +\ "NTrees=850" + ":" +\ "nEventsMin=150" + ":" +\ "MaxDepth=3" + ":" +\ "BoostType=AdaBoost" + ":" +\ "AdaBoostBeta=0.3" + ":" +\ "SeparationType=GiniIndex" + ":" +\ "nCuts=20" + ":" +\ "PruneMethod=NoPruning" # "PruneMethod=CostComplexity" # print bdtOptions factory.BookMethod( TMVA.Types.kBDT, "BDT", bdtOptions) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % _outfname print "=== TMVAClassification is done!\n"
def TMVAClassification(methods, sigfname, bkgfname, optname, channel, trees, verbose=False): #="DecayTree,DecayTree" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Define trees trees = trees.split(",") if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] # Print output file and directory outfname = "TMVA_%s_%s.root" % (channel, optname) myWeightDirectory = "weights_%s_%s" % (channel, optname) print "=== TMVAClassification: output will be written to:" print "=== %s" % outfname print "=== %s" % myWeightDirectory # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI #gROOT.SetMacroPath( "./" ) #gROOT.Macro ( "./tmva/test/TMVAlogon.C" ) #gROOT.LoadMacro ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary?? # Import TMVA classes from ROOT from ROOT import TMVA # Setup TMVA TMVA.Tools.Instance() # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Load data dataloader = TMVA.DataLoader("dataset") # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] print "*** Training on channel:" print "*** %s" % channel print "***" ''' if channel == "1": #dataloader.AddVariable( "pplus_ProbNNp", "Prob(p^{+})", "", 'F' ); #dataloader.AddVariable( "Kminus_ProbNNk", "Prob(K^{-})", "", 'F' ); dataloader.AddVariable( "pplus_PT", "P_{T}(p^{+})", "MeV", 'F' ); dataloader.AddVariable( "Kminus_PT", "P_{T}(K^{-})", "MeV", 'F' ); dataloader.AddVariable( "gamma_PT", "P_{T}(#gamma)", "MeV", 'F' ); dataloader.AddVariable( "Lambda_1520_0_PT", "P_{T}(#Lambda(1520))", "MeV", 'F' ); dataloader.AddVariable( "B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F' ); dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta", "MeV", 'F' ); dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' ); dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}", "MeV", 'F' ); dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "" , 'F' ); dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable( "B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "gamma_IPCHI2_OWNPV", "IP #chi^{2}(#gamma)", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV", "IP(#Lambda(1520))", "mm", 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV", "IP#chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda(1520))", "" , 'F' ); dataloader.AddVariable( "B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "" , 'F' ); dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); ''' if channel == "2": dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F') dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F') dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F') dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV", 'F') dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F') dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)", "#beta", "", 'F') #ok #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' );#BDT learned Mass check1 dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P", "P_{tot,2}", "MeV", 'F') #ok #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)", "#eta(K^{-})-#eta(p^{+})", "", 'F') dataloader.AddVariable( "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)", "#eta(#Lambda*)", "", 'F') dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)", "#eta(#gamma)", "", 'F') dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "", 'F') #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "", 'F') dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda*)", "", 'F') dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda*)", "", 'F') dataloader.AddVariable("B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F') #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#BDT learned Mass check1 #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#put it out because array #dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #not used by BDT #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV", "DIRA(#Lambda*)", "" , 'F' ); #not used #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda*)", "", 'F' ); #not used #dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); #not used # Add Spectator Variables: not used for Training but written in final TestTree #dataloader.AddSpectator( "B_M", "M(#Lambda_{b})", "MeV"); #dataloader.AddSpectator( "Lambda_1520_0_M", "M(#Lambda*)", "MeV"); # Read input data if gSystem.AccessPathName(sigfname) != 0: print "Can not find %s" % sigfname if gSystem.AccessPathName(bkgfname) != 0: print "Can not find %s" % bkgfname inputSig = TFile.Open(sigfname) inputBkg = TFile.Open(bkgfname) # Get the signal and background trees for training signal = inputSig.Get(treeNameSig) background = inputBkg.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree(signal, signalWeight) dataloader.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # dataloader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : dataloader.SetSignalWeightExpression ("weight1*weight2"); # for background: dataloader.SetBackgroundWeightExpression("weight1*weight2"); #dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample #print(sigfname + str( mycutSig ) + treeNameSig) mycutBkg = TCut( "B_M>6120" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" ) #print(bkgfname + str( mycutBkg ) + treeNameBkg) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) #Old if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2" ) #Settings3 #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs print("FLAG 0") factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList))+'vars' Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth outfname = "dataset/weights/TMVA_"+Note+".root" # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Output file outputFile = TFile( outfname, 'RECREATE' ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose( verbose ) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note dataloader = TMVA.DataLoader('dataset') for iVar in varList: if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I') else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F') inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir+infname) sigChain = iFileSig.Get("ljmet") dataloader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir+varsList.bkg[i])) print inputDir+varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue dataloader.AddBackgroundTree( bkg_trees_list[i], 1) signalWeight = 1 dataloader.SetSignalWeightExpression( weightStrS ) dataloader.SetBackgroundWeightExpression( weightStrB ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028' model = Sequential() model.add(Dense(100, activation='relu', input_dim=53)) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(2, activation="sigmoid"))) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',]) # Store model to file model.save('model.h5') model.summary() if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close() # save plots: print "DONE"
def TMVARegression(): try: # retrieve command line options shortopts = "a:o:vh?" longopts = ["analysis=","outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) _outfname = OUTFNAME _analysis = ANALYSIS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-o", "--outputfile"): _outfname = a elif o in ("-a", "--analysis"): _analysis = a elif o in ("-v", "--verbose"): verbose = True # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( _outfname, 'RECREATE' ) # // Create the factory object. Later you can choose the methods # // whose performance you'd like to investigate. The factory will # // then run the performance analysis for you. # // # // The first argument is the base of the name of all the # // weightfiles in the directory weights_Reg/ # // # // The second argument is the output file for the training results # // All TMVA output can be suppressed by removing the "!" (not) in # // front of the "Silent" argument in the option string factory = TMVA.Factory ("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ) # Set verbosity factory.SetVerbose( verbose ) TMVA.gConfig().GetIONames().fWeightFileDir = "weights_Reg_8TeV" + "_" + _analysis if _analysis == "Dijet": factory.AddVariable("hJet_pt", "hJet_pt", "units", 'F') factory.AddVariable("hJet_eta", "hJet_eta", "units", 'F') factory.AddVariable("hJet_phi", "hJet_phi", "units", 'F') factory.AddVariable("hJet_e", "hJet_e", "units", 'F') factory.AddVariable("hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "units", 'F') factory.AddVariable("hJet_Mt:=evalMt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Mt", "units", 'F') factory.AddVariable("hJet_Et:=evalEt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Et", "units", 'F') factory.AddVariable("hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F') factory.AddVariable("hJet_vtxPt", "hJet_vtxPt", "units", 'F') factory.AddVariable("hJet_vtx3dL", "hJet_vtx3dL", "units", 'F') factory.AddVariable("hJet_vtx3deL", "hJet_vtx3deL", "units", 'F') factory.AddVariable("hJet_vtxMass", "hJet_vtxMass", "units", 'F') factory.AddVariable("hJet_chf", "hJet_chf", "units", 'F') factory.AddVariable("hJet_nch", "hJet_nch", "units", 'F') factory.AddVariable("hJet_nconstituents", "hJet_nconstituents", "units", 'F') factory.AddVariable("hJet_JECUnc", "hJet_JECUnc", "units", 'F') factory.AddVariable("rho25", "rho25", "units", 'F') factory.AddVariable("MET.et", "MET.et", "units", 'F') factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, hJet_phi[0], hJet_phi[1])","METdPhi", "units",'F') #Add the variable carrying the regression target factory.AddTarget( "hJet_genPt" ) elif _analysis == "Subjet": factory.AddVariable("fathFilterJets_pt", "fathFilterJets_pt", "units", 'F') factory.AddVariable("fathFilterJets_eta", "fathFilterJets_eta", "units", 'F') factory.AddVariable("fathFilterJets_phi", "fathFilterJets_phi", "units", 'F') factory.AddVariable("fathFilterJets_e", "fathFilterJets_e", "units", 'F') factory.AddVariable("fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "units", 'F') factory.AddVariable("fathFilterJets_Mt:=evalMt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Mt", "units", 'F') factory.AddVariable("fathFilterJets_Et:=evalEt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Et", "units", 'F') factory.AddVariable("fathFilterJets_ptLeadTrack", "fathFilterJets_ptLeadTrack", "units", 'F') factory.AddVariable("fathFilterJets_vtxPt", "fathFilterJets_vtxPt", "units", 'F') factory.AddVariable("fathFilterJets_vtx3dL", "fathFilterJets_vtx3dL", "units", 'F') factory.AddVariable("fathFilterJets_vtx3deL", "fathFilterJets_vtx3deL", "units", 'F') factory.AddVariable("fathFilterJets_vtxMass", "fathFilterJets_vtxMass", "units", 'F') factory.AddVariable("fathFilterJets_chf", "fathFilterJets_chf", "units", 'F') factory.AddVariable("rho25", "rho25", "units", 'F') factory.AddVariable("MET.et", "MET.et", "units", 'F') factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, fathFilterJets_phi[0], fathFilterJets_phi[1])","METdPhi", "units",'F') factory.AddTarget("fathFilterJets_genPt") else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) ## Get the Signal trees en7TeV = False en8TeV = True regWeight = 1. chain = TChain("tree") if en7TeV: #change the ntuple names later!! chain.Add("Step2_output_May11/WH_125_ForRegression.root") chain.Add("Step2_output_May11/WH_115_ForRegression.root") chain.Add("Step2_output_May11/WH_120_ForRegression.root") chain.Add("Step2_output_May11/WH_130_ForRegression.root") chain.Add("Step2_output_May11/WH_135_ForRegression.root") #if en8TeV and _analysis == "Dijet": # chain.Add("dcache:/pnfs/cms/WAX/11/store/user/lpchbb/apana/Step1V33_Step2_V2/DiJetPt_ZH_ZToLL_HToBB_M-110_8TeV-powheg-herwigpp.root") if en8TeV: chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_110_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_115_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_120_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_125_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_130_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_135_summer12_33b.root") NEntries = chain.GetEntries() print "Number of entries on Chain:",NEntries regTree = chain factory.AddRegressionTree( regTree, regWeight ) #This would set individual event weights (the variables defined in the #expression need to exist in the original TTree) #factory->SetWeightExpression( "var1", "Regression" ) if _analysis == "Dijet": cutString=\ "(Vtype == 0 || Vtype == 1)" + " && " +\ "hJet_pt[0] > 20.0" + " && " +\ "hJet_pt[1] > 20.0" + " && " +\ "hJet_genPt[0] > 0.0" + " && " +\ "hJet_genPt[1] > 0.0" + " && " +\ "hJet_eta[0] < 2.4" + " && " +\ "hJet_eta[1] < 2.4" + " && " +\ "hJet_id[0] > 0.0" + " && " +\ "hJet_id[1] > 0.0" + " && " +\ "max(hJet_csv[0],hJet_csv[1]) > 0.0" + " && " +\ "min(hJet_csv[0],hJet_csv[1]) > 0.0" + " && " +\ "H.pt > 100" elif _analysis == "Subjet": cutString=\ "(Vtype == 0 || Vtype == 1)" + " && " +\ "fathFilterJets_pt[0] > 20.0" + " && " +\ "fathFilterJets_pt[1] > 20.0" + " && " +\ "fathFilterJets_genPt[0] > 0.0" + " && " +\ "fathFilterJets_genPt[1] > 0.0" + " && " +\ "fathFilterJets_eta[0] < 2.4" + " && " +\ "fathFilterJets_eta[1] < 2.4" + " && " +\ "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0" + " && " +\ "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0" + " && " +\ "FatH.filteredpt > 100" else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) print cutString mycut = TCut( cutString ) # tell the factory to use all remaining events in the trees after training for testing. The number is 25% of the events after cuts: if en7TeV: factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=125000:nTest_Regression=125000:SplitMode=Random:NormMode=NumEvents:!V" ) if en8TeV: factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=111000:nTest_Regression=111000:SplitMode=Random:NormMode=NumEvents:!V" ) #If no numbers of events are given, half of the events in the tree are used #for training, and the other half for testing: #factory.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); #---- Book MVA methods #please lookup the various method configuration options in the corresponding cxx files, eg: #src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html #it is possible to preset ranges in the option string in which the cut optimisation should be done: #"...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable #Boosted Decision Trees factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=60:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ) # ------------------------------------------------------------------------------------------- #---- Now you can tell the factory to train, test, and evaluate the MVAs # Train MVAs using the set of training events factory.TrainAllMethods() # ---- Evaluate all MVAs using the set of test events factory.TestAllMethods() # ----- Evaluate and compare performance of all configured MVAs factory.EvaluateAllMethods() # -------------------------------------------------------------- NEntries = regTree.GetEntries() print "Number of entries on Tree: ",NEntries # Save the output outputFile.Close() print "==> Wrote root file %s\n" % _outfname print "==> TMVARegression is done!\n"