def checkRootVer(): if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1)
#!/usr/bin/env python #example: python MyTMVAClassification.py truth akt10truth_trim_pt,akt10truth_trim_mass "pt>0,pt<1000,mass>0,mass<200,pass_selection==1" "truth_tau2_WTA,truth_tau1_WTA" KNN # Standard python import import sys # exit import time # time accounting import getopt # command line parser # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA # Print usage help def usage(): print " " print "Usage: python %s [options]"
def main(): try: # retrive command line options shortopts = "m:p:M:C:B:i:t:T:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT phil = DEFAULT_PHIL outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False test = False testType = DEFAULT_TEST_TYPE methTest = False testMethod = DEFAULT_TEST_METHOD for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-p", "--philosophy"): phil = a elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-T", "--test"): test = True temp = a.split('_') if len(temp) == 1: testType = temp[0] if testType != "ada" or testType != "grad": print "ERROR: testType must be ada or grad not", testType elif len(temp) - temp.count('') == 2: methTest = True testType = temp[0] testMethod = temp[1] checkTestType(testType, testMethod) else: print "ERROR: need to give one or two test options" print temp sys.exit(1) elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str("%3.1f" % mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" if test: if methTest: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + "_" + testMethod + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/") gROOT.Macro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAlogon.C") gROOT.LoadMacro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) factory.AddVariable("bdtoutput", "BDT Output", 'F') factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str) signal_test = input.Get(treeNameSig + "_test" + mass_str) background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str) background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width) + " && bdtoutput > -0.5") # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS if (not test): # Likelihood factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood" + phil, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD" + phil, "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) #factory.BookMethod( TMVA.Types.kPDERS, "MultiLikelihood"+phil,"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); # BDT factory.BookMethod( TMVA.Types.kBDT, "BDTada" + phil, "!H:!V:NTrees=200:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=1.0:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDTgrad" + phil, "!H:!V:NTrees=200:MaxDepth=3:BoostType=Grad:Shrinkage=0.5:UseBaggedGrad:GradBaggingFraction=1.0:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) else: #test # BDT ada if testType == "ada": #if testMethod=="nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d", "!H:!V:NTrees=" + str(nTrees) + ":nEventsMin=150:MaxDepth=" + str(depth) + ":BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) # if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_0.05b_"+str(nCuts)+"c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts="+str(nCuts)+":PruneMethod=NoPruning") #if testMethod=="beta": # for beta in [0.05,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_"+str(beta)+"b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta="+str(beta)+":SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") # BDT grad if testType == "grad": if testMethod == "nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: for shrinkage in [0.05, 0.5, 1.]: factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d_" + str(shrinkage) + "s", "!H:!V:NTrees=" + str(nTrees) + ":MaxDepth=" + str(depth) + ":BoostType=Grad:Shrinkage=" + str(shrinkage) + ":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) #if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="shrinkage": # for shrinkage in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_"+str(shrinkage)+"s_1gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage="+str(shrinkage)+":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="bagFrac": # for bagFrac in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_"+str(bagFrac)+"gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction="+str(bagFrac)+":SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_"+str(nCuts)+"c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts="+str(nCuts)+":NNodesMax=10") #if testMethod=="nNM": # for nNM in [10,100,500,1000,10000]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_50c_"+str(nNM)+"nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax"+str(nNM)) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:M:C:B:i:t:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str(mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" outfname = outfname + mass_str + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("./") #gROOT.Macro ( "./TMVAlogon.C" ) #gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Define the input variables that shall be used for the classifier training factory.AddVariable("pho1_ptOverM", "P_{T}^{lead} / M_{H}", "", 'F') factory.AddVariable("pho2_ptOverM", "P_{T}^{sublead} / M_{H}", "", 'F') factory.AddVariable("pho1_eta", "#eta^{lead}", "", 'F') factory.AddVariable("pho2_eta", "#eta^{sublead}", "", 'F') factory.AddVariable("d_phi", "#Delta #phi", "rad", 'F') #should this be cos delta Phi factory.AddVariable("H_ptOverM", "P_{T}^{Higgs}/M_{H}", "", 'F') factory.AddVariable("H_eta", "#eta^{Higgs}", "", 'F') factory.AddVariable("sigmaMOverM", "#sigmaM_{cor} / M", 'F') factory.AddVariable("sigmaMOverM_wrongVtx", "#sigmaM_{wrong} / M", 'F') factory.AddVariable("vtx_prob", "P_{vertex}", "", 'F') #factory.AddVariable( "cos_theta_star","cos(#theta)*", "", 'F' ); #factory.AddVariable( "max_eta","max(#eta^{lead},#eta^{sub.})", "", 'F' ); #factory.AddVariable( "min_r9","min(r9^{lead},r9^{sub.})", "", 'F' ); factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str + ".0") signal_test = input.Get(treeNameSig + "_test" + mass_str + ".0") background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str + ".0") background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str + ".0") # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width)) # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + mass_str + cat_str, "!H:!V:NTrees=400:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + mass_str + cat_str, "!H:!V:NTrees=500:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=50:NNodesMax=5" ) #test # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV'] # varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] # varList = ['svMass', 'dRTauTau', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ2DPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] varList = [ 'svMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi', 'CSVJ1', 'CSVJ2' ] for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "fMass") # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # sigChain = r.TChain("ttTreeFinal/eventTree") # bkg1Chain = r.TChain("ttTreeFinal/eventTree") # bkg2Chain = r.TChain("ttTreeFinal/eventTree") # Get the signal and background trees for training iFileSig = TFile.Open("/scratch/zmao/relaxed_regression/%s" % (infname)) iFileBkg = TFile.Open( "/scratch/zmao/relaxed_regression/trainSample_relaxedsamebTag.root") sigChain = iFileSig.Get("eventTree") bkgChain = iFileBkg.Get("eventTree") # Global event weights (see below for setting event-wise weights) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree(sigChain, signalWeight) factory.AddBackgroundTree(bkgChain, 1) factory.SetSignalWeightExpression('triggerEff') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "iso1<1.5 && iso2<1.5 && CSVJ1 > 0.679 && CSVJ2 > 0.244 && abs(eta1)<2.1 && abs(eta2)<2.1 && charge1 + charge2 == 0" ) mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_QCD_%i.xml' % len( varList) os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:s:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH varListKey = DEFAULT_VARLISTKEY verbose = True SeedN = DEFAULT_SEED for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-s", "--seed"): SeedN = long(a) elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth outfname = "dataset/weights/TMVA_" + Note + ".root" # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3 # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA fClassifier = TMVA.Factory( "VariableImportance", "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification" ) str_xbitset = '{:053b}'.format(SeedN) seeddl = TMVA.DataLoader(str_xbitset) bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' index = 52 for iVar in varList: if (str_xbitset[index] == '1'): seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F') print iVar[0] index = index - 1 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") seeddl.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] bkgList = varsList.bkg for i in range(len(bkgList)): bkg_list.append(TFile.Open(inputDir + bkgList[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue seeddl.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 seeddl.SetSignalWeightExpression(weightStrS) seeddl.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) seeddl.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting) fClassifier.TrainAllMethods() fClassifier.TestAllMethods() fClassifier.EvaluateAllMethods() SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT") print "ROC-integral : ", str_xbitset, " ", SROC print "SEED " + str_xbitset + " DONE" fClassifier.DeleteAllMethods() fClassifier.fMethodsMap.clear() print "==================================================================" print "=================================================================="
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") #gROOT.Macro ( "./TMVAlogon.C" ) #gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile("TMVA.root", "RECREATE") #outputFile = TFile( outfname, 'RECREATE' ) # Create dataloader dataloader = TMVA.DataLoader("dataset") # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # Read input data # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) file1 = TFile.Open("~/Test/Testy/out_sig.root") signal = file1.Get("ntuple") signalWeight = 1.0 dataloader.AddSignalTree(signal, signalWeight) file2 = TFile.Open("~/Test/Testy/out_bkg.root") background = file2.Get("ntuple") backgroundWeight = 1.0 dataloader.AddBackgroundTree(background, backgroundWeight) # Get the signal and background trees for training dataloader.AddVariable("dist", "Vardist", "units", 'F') dataloader.AddVariable("mu_likep", "Varmup", "units", 'F') dataloader.AddVariable("mu_likem", "Varmum", "units", 'F') dataloader.AddVariable("DeltPhi", "VarDel", "units", 'F') dataloader.AddSpectator("minv", "Varminv", 'F') # Global event weights (see below for setting event-wise weights) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("") mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) from subprocess import call from os.path import isfile from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.regularizers import l2 from keras.optimizers import SGD TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() model = Sequential() model.add(Dense(64, activation='relu', W_regularizer=l2(1e-5), input_dim=4)) model.add(Dense(2, activation='softmax')) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01), metrics=[ 'accuracy', ]) # Store model to file model.save('model.h5') model.summary() # Book methods if "Keras" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "Fisher", "!H:!V:Fisher:VarTransform=D,G") factory.BookMethod( dataloader, TMVA.Types.kPyKeras, "PyKeras", "H:!V:VarTransform=D,G:FilenameModel=model.h5:NumEpochs=20:BatchSize=32" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) #! gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' ) #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' ) #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAnalysis: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "../macros/" ) gROOT.Macro ( "../macros/TMVAlogon.C" ) gROOT.LoadMacro ( "../macros/TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAnalysis.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAnalysis", outputFile, "!V:!Silent:Color" ) # Set verbosity factory.SetVerbose( verbose ) # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "var1+var2", 'F' ) factory.AddVariable( "var1-var2", 'F' ) factory.AddVariable( "var3", 'F' ) factory.AddVariable( "var4", 'F' ) # Read input data if not gSystem.AccessPathName( infname ): input = TFile( infname ) else: print "ERROR: could not access data file %s\n" % infname # Get the signal and background trees for training signal = input.Get( treeNameSig ) background = input.Get( treeNameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAnalysis.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # This would set individual event weights (the variables defined in the # expression need to exist in the original TTree) # for signal : factory.SetSignalWeightExpression("weight1*weight2") # for background: factory.SetBackgroundWeightExpression("weight1*weight2") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) mycutBkg = TCut( "" ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V" ) # ... and alternative call to use a different number of signal and background training/test event is: # factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ) # -------------------------------------------------------------------------------------------------- # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemperature=IncreasingAdaptive:InitialTemperature=1e+6:MinTemperature=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmooth=10:NAvEvtPerBin=50" ) # test the decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # test the new kernel density estimator if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # test the mixed splines and kernel density estimator (depending on which variable) if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpol[0]=KDE:PDFInterpol[1]=KDE:PDFInterpol[2]=Spline2:PDFInterpol[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # PDE - RS method if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) # And the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" if "PDERSkNN" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # K-Nearest Neighbour class ifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "nkNN=400:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Fisher discriminant if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=3:Steps=20:Trim=True:SaveBestGen=0" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:!Normalise:NeuronType=tanh:NCycles=200:HiddenLayers=N+1,N:TestRate=5" ) # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:.. # Support Vector Machines using three d ifferent Kernel types (Gauss, polynomial and linear) if "SVM_Gauss" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" ) if "SVM_Poly" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" ) if "SVM_Lin" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" ) # Boosted Decision Trees (second one with decorrelation) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=1.5" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # Friedman's RuleFit method, implementation by J. Friedman if "RuleFitJF" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitJF", "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAnalysis is done!\n" # open the GUI for the result macros gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): try: # retrive command line options shortopts = "m:i:n:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] varList = varsList.varList for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables print "here" massPoint = infname preFix = varsList.preFix infname = "ZPrime_%s_all_SYNC_%s_noIso%s" % (massPoint, varsList.fs, varsList.tail) iFileSig = TFile.Open(preFix + infname) sigChain = iFileSig.Get("eventTree_train") factory.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1])) print preFix + varsList.bkg[i][1] bkg_trees_list.append(bkg_list[i].Get("eventTree")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue factory.AddBackgroundTree(bkg_trees_list[i], 1) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries()) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== factory.SetSignalWeightExpression('weightWithPU') factory.SetBackgroundWeightExpression('weightWithPU') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("") mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Block:NormMode=NumEvents:!V" ) # "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") bdtSetting = '!H:!V:NTrees=%s' % nTrees bdtSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100' if "BDT" in mlist: factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # # print "=== wrote root file %s\n" % outfname # print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv %s/weights/TMVAClassification_BDT.weights.xml %s/weights/TMVAClassification_BDT.weights_both_%s.xml' % ( os.getcwd(), os.getcwd(), massPoint) os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV'] varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi',]
def main(): # Default settings for command line arguments DEFAULT_OUTFNAME = "TMVAXi2.root" DEFAULT_INFNAME = "MC_Xic0_2015_filtered.root" DEFAULT_TREESIG = "DecayTree" DEFAULT_TREEBKG = "DecayTree" DEFAULT_METHODS = "Cuts,CutsD,CutsPCA,CutsGA,CutsSA,Likelihood,LikelihoodD,LikelihoodPCA,LikelihoodKDE,LikelihoodMIX,PDERS,PDERSD,PDERSPCA,PDEFoam,PDEFoamBoost,KNN,LD,Fisher,FisherG,BoostedFisher,HMatrix,FDA_GA,FDA_SA,FDA_MC,FDA_MT,FDA_GAMT,FDA_MCMT,MLP,MLPBFGS,MLPBNN,CFMlpANN,TMlpANN,SVM,BDT,BDTD,BDTG,BDTB,RuleFit" import argparse argparser = argparse.ArgumentParser() argparser.add_argument("-m", "--methods", default=repr(DEFAULT_METHODS.split(','))) argparser.add_argument("-o", "--outputfile", default=DEFAULT_OUTFNAME) argparser.add_argument('--variables') argparser.add_argument('-s', '--spectators', default='()') argparser.add_argument('--signalfile', default=DEFAULT_INFNAME) argparser.add_argument('--signaltree', default=DEFAULT_TREESIG) argparser.add_argument('--signalsel', default='') argparser.add_argument('--signalweight', default='') argparser.add_argument('--bkgfile', default=DEFAULT_INFNAME) argparser.add_argument('--bkgtree', default=DEFAULT_TREEBKG) argparser.add_argument('--bkgsel', default='') argparser.add_argument('--bkgweight', default='') argparser.add_argument('--factoryname', default="TMVAClassification") argparser.add_argument('-v', '--verbose', action='store_true', default=False) argparser.add_argument('--weightsdir', default='weights') argparser.add_argument('--datasetname', default='dataset') args = argparser.parse_args() weightsdir = args.weightsdir ROOT.TMVA.Config.Instance().GetIONames().fWeightFileDir = weightsdir # Print methods mlist = eval(args.methods) print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI if os.path.exists('./TMVAlogon.C'): gROOT.Macro("./TMVAlogon.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(args.outputfile, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( args.factoryname, outputFile, "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(args.verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] # For ROOT v6 compatibility. root6 = not hasattr(factory, 'AddVariable') if root6: dataloader = ROOT.TMVA.DataLoader(args.datasetname) else: dataloader = factory for var in eval(args.variables): if not isinstance(var, (tuple, list)): var = (var, ) try: dataloader.AddVariable(*var) except: print 'Failed to call dataloader.AddVariable with args', var raise # dataloader.AddVariable( "myvar1 := var1+var2", 'F' ) # dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ) # dataloader.AddVariable( "var3", "Variable 3", "units", 'F' ) # dataloader.AddVariable( "var4", "Variable 4", "units", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables for var in eval(args.spectators): if not isinstance(var, (tuple, list)): var = (var, ) try: dataloader.AddSpectator(*var) except: print 'Failed to call dataloader.AddSpectator with args', var raise # dataloader.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # dataloader.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) # input = TFile.Open( infname ) # # Get the signal and background trees for training # signal = input.Get( treeNameSig ) # background = input.Get( treeNameBkg ) signalfile = TFile.Open(args.signalfile) if signalfile.IsZombie(): raise OSError("Couldn't find signal file " + repr(args.signalfile)) signal = signalfile.Get(args.signaltree) if not signal: raise ValueError("Couldn't find signal TTree " + repr(args.signaltree) + " in file " + repr(args.signalfile)) bkgfile = TFile.Open(args.bkgfile) if bkgfile.IsZombie(): raise OSError("Couldn't find bkg file " + repr(args.bkgfile)) background = bkgfile.Get(args.bkgtree) if not background: raise ValueError("Couldn't find bkg TTree " + repr(args.bkgtree) + " in file " + repr(args.bkgfile)) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree(signal, signalWeight) dataloader.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # dataloader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : dataloader.SetSignalWeightExpression ("weight1*weight2"); # for background: dataloader.SetBackgroundWeightExpression("weight1*weight2"); if args.signalweight: dataloader.SetSignalWeightExpression(args.signalweight) if args.bkgweight: dataloader.SetBackgroundWeightExpression(args.bkgweight) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(args.signalsel) mycutBkg = TCut(args.bkgsel) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if root6: # Bit of an ugly hack, but does the job. factory._BookMethod = factory.BookMethod # Don't know why 'self' isn't passed here? def BookMethod(*args): factory._BookMethod(dataloader, *args) factory.BookMethod = BookMethod if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outputFile.GetName() print "=== TMVAClassification is done!\n" # open the GUI for the result macros if not ROOT.gROOT.IsBatch(): if hasattr(TMVA, 'TMVAGui'): TMVA.TMVAGui(outputFile.GetName()) raw_input('Hit enter to quit.') elif 'ROOTSYS' in os.environ: tmvaguipath = os.path.join(os.environ['ROOTSYS'], 'tutorials', 'tmva') if os.path.exists(os.path.join(tmvaguipath, 'TMVAGui.C')): gROOT.SetMacroPath(tmvaguipath) gROOT.LoadMacro("TMVAGui.C") try: gROOT.ProcessLine("TMVAGui(\"%s\")" % outputFile.GetName()) raw_input('Hit enter to quit.') except RuntimeError: print "Couldn't run TMVAGui!" outputfilename = outputFile.GetName() weightsfiles = dict( (m, os.path.join(weightsdir, args.factoryname + '_' + m + '.weights.xml')) for m in mlist) classfiles = dict( (m, os.path.join(weightsdir, args.factoryname + '_' + m + '.class.C')) for m in mlist) # keep the ROOT thread running (this makes the function hang). #gApplication.Run() # TMVA disables unused branches when copying the trees then doesn't change them back. background.SetBranchStatus('*', 1) signal.SetBranchStatus('*', 1) if 'signalfile' in locals(): signalfile.Close() if 'bkgfile' in locals(): bkgfile.Close() return locals()
def TMVAClassification(methods, sigfname, bkgfname, optname, channel, trees, verbose=False): #="DecayTree,DecayTree" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Define trees trees = trees.split(",") if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] # Print output file and directory outfname = "TMVA_%s_%s.root" % (channel, optname) myWeightDirectory = "weights_%s_%s" % (channel, optname) print "=== TMVAClassification: output will be written to:" print "=== %s" % outfname print "=== %s" % myWeightDirectory # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI #gROOT.SetMacroPath( "./" ) #gROOT.Macro ( "./tmva/test/TMVAlogon.C" ) #gROOT.LoadMacro ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary?? # Import TMVA classes from ROOT from ROOT import TMVA # Setup TMVA TMVA.Tools.Instance() # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Load data dataloader = TMVA.DataLoader("dataset") # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] print "*** Training on channel:" print "*** %s" % channel print "***" ''' if channel == "1": #dataloader.AddVariable( "pplus_ProbNNp", "Prob(p^{+})", "", 'F' ); #dataloader.AddVariable( "Kminus_ProbNNk", "Prob(K^{-})", "", 'F' ); dataloader.AddVariable( "pplus_PT", "P_{T}(p^{+})", "MeV", 'F' ); dataloader.AddVariable( "Kminus_PT", "P_{T}(K^{-})", "MeV", 'F' ); dataloader.AddVariable( "gamma_PT", "P_{T}(#gamma)", "MeV", 'F' ); dataloader.AddVariable( "Lambda_1520_0_PT", "P_{T}(#Lambda(1520))", "MeV", 'F' ); dataloader.AddVariable( "B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F' ); dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta", "MeV", 'F' ); dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' ); dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}", "MeV", 'F' ); dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "" , 'F' ); dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable( "B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "gamma_IPCHI2_OWNPV", "IP #chi^{2}(#gamma)", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV", "IP(#Lambda(1520))", "mm", 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV", "IP#chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda(1520))", "" , 'F' ); dataloader.AddVariable( "B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "" , 'F' ); dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); ''' if channel == "2": dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F') dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F') dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F') dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV", 'F') dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F') dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)", "#beta", "", 'F') #ok #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' );#BDT learned Mass check1 dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P", "P_{tot,2}", "MeV", 'F') #ok #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)", "#eta(K^{-})-#eta(p^{+})", "", 'F') dataloader.AddVariable( "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)", "#eta(#Lambda*)", "", 'F') dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)", "#eta(#gamma)", "", 'F') dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "", 'F') #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "", 'F') dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda*)", "", 'F') dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda*)", "", 'F') dataloader.AddVariable("B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F') #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#BDT learned Mass check1 #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#put it out because array #dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #not used by BDT #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV", "DIRA(#Lambda*)", "" , 'F' ); #not used #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda*)", "", 'F' ); #not used #dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); #not used # Add Spectator Variables: not used for Training but written in final TestTree #dataloader.AddSpectator( "B_M", "M(#Lambda_{b})", "MeV"); #dataloader.AddSpectator( "Lambda_1520_0_M", "M(#Lambda*)", "MeV"); # Read input data if gSystem.AccessPathName(sigfname) != 0: print "Can not find %s" % sigfname if gSystem.AccessPathName(bkgfname) != 0: print "Can not find %s" % bkgfname inputSig = TFile.Open(sigfname) inputBkg = TFile.Open(bkgfname) # Get the signal and background trees for training signal = inputSig.Get(treeNameSig) background = inputBkg.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree(signal, signalWeight) dataloader.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # dataloader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : dataloader.SetSignalWeightExpression ("weight1*weight2"); # for background: dataloader.SetBackgroundWeightExpression("weight1*weight2"); #dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample #print(sigfname + str( mycutSig ) + treeNameSig) mycutBkg = TCut( "B_M>6120" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" ) #print(bkgfname + str( mycutBkg ) + treeNameBkg) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) #Old if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2" ) #Settings3 #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs print("FLAG 0") factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
def main(): try: # Retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # Print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVApplication: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("../macros/") gROOT.Macro("../macros/TMVAlogon.C") # Import TMVA classes from ROOT from ROOT import TMVA # Create the Reader object reader = TMVA.Reader("!Color") # Create a set of variables and declare them to the reader # - the variable names must corresponds in name and type to # those given in the weight file(s) that you use # what to do ??? var1 = array('f', [0]) var2 = array('f', [0]) var3 = array('f', [0]) var4 = array('f', [0]) reader.AddVariable("var1+var2", var1) reader.AddVariable("var1-var2", var2) reader.AddVariable("var3", var3) reader.AddVariable("var4", var4) # book the MVA methods dir = "weights/" prefix = "TMVAnalysis_" for m in mlist: reader.BookMVA(m + " method", dir + prefix + m + ".weights.txt") ####################################################################### # For an example how to apply your own plugin method, please see # TMVA/macros/TMVApplication.C ####################################################################### # Book output histograms nbin = 80 histList = [] for m in mlist: histList.append(TH1F(m, m, nbin, -3, 3)) # Book example histogram for probability (the other methods would be done similarly) if "Fisher" in mlist: probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1) rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0, 1) # Prepare input tree (this must be replaced by your data source) # in this example, there is a toy tree with signal and one with background events # we'll later on use only the "signal" events for the test in this example. # fname = "./tmva_example.root" print "--- Accessing data file: %s" % fname input = TFile.Open(fname) if not input: print "ERROR: could not open data file: %s" % fname sys.exit(1) # # Prepare the analysis tree # - here the variable names have to corresponds to your tree # - you can use the same variables as above which is slightly faster, # but of course you can use different ones and copy the values inside the event loop # print "--- Select signal sample" theTree = input.Get("TreeS") userVar1 = array('f', [0]) userVar2 = array('f', [0]) theTree.SetBranchAddress("var1", userVar1) theTree.SetBranchAddress("var2", userVar2) theTree.SetBranchAddress("var3", var3) theTree.SetBranchAddress("var4", var4) # Efficiency calculator for cut method nSelCuts = 0 effS = 0.7 # Process the events print "--- Processing: %i events" % theTree.GetEntries() sw = TStopwatch() sw.Start() for ievt in range(theTree.GetEntries()): if ievt % 1000 == 0: print "--- ... Processing event: %i" % ievt # Fill event in memory theTree.GetEntry(ievt) # Compute MVA input variables var1[0] = userVar1[0] + userVar2[0] var2[0] = userVar1[0] - userVar2[0] # Return the MVAs and fill to histograms if "CutsGA" in mlist: passed = reader.EvaluateMVA("CutsGA method", effS) if passed: nSelCuts = nSelCuts + 1 # Fill histograms with MVA outputs for h in histList: h.Fill(reader.EvaluateMVA(h.GetName() + " method")) # Retrieve probability instead of MVA output if "Fisher" in mlist: probHistFi.Fill(reader.GetProba("Fisher method")) rarityHistFi.Fill(reader.GetRarity("Fisher method")) # Get elapsed time sw.Stop() print "--- End of event loop: %s" % sw.Print() # Return computed efficeincies if "CutsGA" in mlist: eff = float(nSelCuts) / theTree.GetEntries() deff = math.sqrt(eff * (1.0 - eff) / theTree.GetEntries()) print "--- Signal efficiency for Cuts method : %.5g +- %.5g (required was: %.5g)" % ( eff, deff, effS) # Test: retrieve cuts for particular signal efficiency mcuts = reader.FindMVA("CutsGA method") cutsMin = array('d', [0, 0, 0, 0]) cutsMax = array('d', [0, 0, 0, 0]) mcuts.GetCuts(0.7, cutsMin, cutsMax) print "--- -------------------------------------------------------------" print "--- Retrieve cut values for signal efficiency of 0.7 from Reader" for ivar in range(4): print "... Cut: %.5g < %s <= %.5g" % ( cutsMin[ivar], reader.GetVarName(ivar), cutsMax[ivar]) print "--- -------------------------------------------------------------" # # write histograms # target = TFile("TMVApp.root", "RECREATE") for h in histList: h.Write() # Write also probability hists if "Fisher" in mlist: probHistFi.Write() rarityHistFi.Write() target.Close() print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms" print "==> TMVApplication is done!"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string # factory = TMVA.Factory( "TMVAClassification", outputFile, # "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) jobname = DEFAULT_OUTFNAME factory = TMVA.Factory( jobname.replace(".root", ""), outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ) # pascal # Set verbosity factory.SetVerbose(verbose) # Adjust variables if old sample is used if IsOLD: SPECTATORS.remove("JisPU") SPECTATORS.remove("JisHS") # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" theCat1Vars = "" theCat2Vars = "" theCat3Vars = "" for var in VARIABLES: factory.AddVariable(var, 'F') theCat1Vars += var + ":" theCat2Vars += var + ":" theCat3Vars += var + ":" theCat1Vars = theCat1Vars.rstrip(":") theCat2Vars = theCat2Vars.rstrip(":") theCat3Vars = theCat3Vars.rstrip(":") # You can add so-called "Spectator variables", which are not used in the MVA training, for spect in SPECTATORS: factory.AddSpectator(spect, spect) # Apply additional cuts on the signal and background sample. mycutSig = "" mycutBkg = TCut(SELECTION + "&&JisPU") if doJTruthMatchPt10Cut: mycutSig = TCut(SELECTION + "&&JisHS && Jtruthpt>10") else: mycutSig = TCut(SELECTION + "&&JisHS") cat1cuts = TCut("Jpt >20 && Jpt <30") cat2cuts = TCut("Jpt >30 && Jpt <40") cat3cuts = TCut("Jpt >40 && Jpt <50") # open file input = TFile.Open(infname) # Get the signal and background trees for training signal = input.Get(treeNameSig) background = input.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal, signalWeight) factory.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples TrainingAndTestTreeStr= "nTrain_Signal="+str(DEFAULT_NEVENTS_TRAIN_S)+\ ":nTrain_Background="+str(DEFAULT_NEVENTS_TRAIN_B)+\ ":nTest_Signal="+str(DEFAULT_NEVENTS_TEST_S)+\ ":nTest_Background="+str(DEFAULT_NEVENTS_TEST_B)+\ ":SplitMode=Random:NormMode=EqualNumEvents:!V" factory.PrepareTrainingAndTestTree(mycutSig, mycutBkg, TrainingAndTestTreeStr) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # multidim likelihood --- kNN if "kNN100" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN100", "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=False:BalanceDepth=6" ) if "kNN100trim" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN100trim", "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=True:BalanceDepth=6" ) if "likelihood" in mlist: factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood", "H:!V:") if "BDT" in mlist: BDToptions = "!H:NTrees=850:nEventsMin=150:MaxDepth=5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VerbosityLevel=Error" factory.BookMethod(TMVA.Types.kBDT, "BDT", BDToptions) # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine("TMVAGui(\"%s\")" % outfname) # keep the ROOT thread running gApplication.Run()
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass outfname = "weights/TMVA_" + Note + ".root" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI # gROOT.SetMacroPath( "./" ) # gROOT.Macro ( "./TMVAlogon.C" ) # gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string # factory = TMVA.Factory( "TMVAClassification", outputFile, # "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] for iVar in varList: if iVar[0] == 'NJets_singleLepCalc': factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I') else: factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables inputDir = varsList.inputDir print 'mass point ' + mass infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % ( mass) iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") # os.exits(1) #BDT machinary factory.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir + varsList.bkg[i])) print inputDir + varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue factory.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) factory.SetSignalWeightExpression(weightStrS) factory.SetBackgroundWeightExpression(weightStrB) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, # "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" ) "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # bdtSetting for "BDT" bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTMitFisher" bdtFSetting = '!H:!V:NTrees=%s' % nTrees bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20' bdtFSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTG" bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20' bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG #bdtGSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTB" bdtBSetting = '!H:!V:NTrees=%s' % nTrees bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20' bdtBSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTD" bdtDSetting = '!H:!V:NTrees=%s' % nTrees bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate' bdtDSetting += ':IgnoreNegWeightsInTraining=True' #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m #BOOKING AN ALGORITHM if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting) if methods == "BDTG": factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting) if methods == "BDTMitFisher": factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting) if methods == "BDTB": factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting) if methods == "BDTD": factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # # print "=== wrote root file %s\n" % outfname # print "=== TMVAClassification is done!\n" # save plots: os.chdir('weights/' + Note) #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies gROOT.SetBatch(1) TMVA.efficiencies( "../../" + outfname ) #Classifier Background Rejection vs Signal Efficiency (ROC curve) #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve) TMVA.mvas("../../" + outfname, 0) #Classifier Output Distributions (test sample) TMVA.correlations( "../../" + outfname) #Input Variable Linear Correlation Coefficients TMVA.variables("../../" + outfname) #Input variables (training sample) #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed) #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed) if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname) # os.chdir('plots') # try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png') # except: pass # try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png') # except: pass # #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png') # #except: pass # try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png') # except: pass # try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png') # except: pass # try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png') # except: pass # try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png') # except: pass # os.system('rm *.eps') print "DONE"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] varList = varsList.varList for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables factory.AddSpectator("sampleName2") factory.AddSpectator("EVENT") factory.AddSpectator("svMass") factory.AddSpectator("met") factory.AddSpectator("fMass") factory.AddSpectator("CSVJ2") factory.AddSpectator("chi2KinFit") factory.AddSpectator("mJJ") factory.AddSpectator("category") factory.AddSpectator("triggerEff") factory.AddSpectator("initEvents") factory.AddSpectator("xs") # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # sigChain = r.TChain("ttTreeFinal/eventTree") # bkg1Chain = r.TChain("ttTreeFinal/eventTree") # bkg2Chain = r.TChain("ttTreeFinal/eventTree") # Get the signal and background trees for training # tool.addFiles(ch=sigChain, dirName="/hdfs/store/user/zmao/H2hh260_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) # tool.addFiles(ch=bkg1Chain, dirName="/hdfs/store/user/zmao/tt_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) # tool.addFiles(ch=bkg2Chain, dirName="/hdfs/store/user/zmao/ZZ_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) massPoint = infname Lumi = varsList.Lumi preFix = varsList.preFix infname = "H2hh%s_all_tightopposite%s3rdLepVeto.root" % (massPoint, varsList.region) iFileSig = TFile.Open(preFix + infname) sigChain = iFileSig.Get("eventTree") signalWeight = 1 factory.AddSignalTree(sigChain, signalWeight) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1])) bkg_trees_list.append(bkg_list[i].Get("eventTree")) hist_list.append(bkg_list[i].Get('preselection')) bkg_trees_list[i].GetEntry(0) weightsList.append( (bkg_trees_list[i].xs) / hist_list[i].GetBinContent(1)) if bkg_trees_list[i].GetEntries() == 0: continue if varsList.bkg[i][0] != 'QCD': factory.AddBackgroundTree(bkg_trees_list[i], weightsList[i] * Lumi * 1000) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries() * weightsList[i] * Lumi * 1000) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries()) else: factory.AddBackgroundTree( bkg_trees_list[i], weightsList[i] * hist_list[i].GetBinContent(1)) print "%s:\t\t%.2f" % ( varsList.bkg[i][0], bkg_trees_list[i].GetEntries() * weightsList[i] * hist_list[i].GetBinContent(1)) # iFileBkg1 = TFile.Open(location+"TMVARegApp_tt_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg2 = TFile.Open(location+"TMVARegApp_ZZ_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg3 = TFile.Open(location+"TMVARegApp_tt_semi_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg4 = TFile.Open(location+"TMVARegApp_DY2JetsToLL_all_tightoppositebTag.root") # iFileBkg5 = TFile.Open(location+"TMVARegApp_DY3JetsToLL_all_tightoppositebTag.root") # # iFileBkg6 = TFile.Open(location+"TMVARegApp_W1JetsToLNu_eff2_all_tightoppositebTag.root") # # iFileBkg7 = TFile.Open(location+"TMVARegApp_W2JetsToLNu_eff2_all_tightoppositebTag.root") # iFileBkg8 = TFile.Open(location+"TMVARegApp_W3JetsToLNu_all_tightoppositebTag.root") # # iFileBkg9 = TFile.Open(location+"TMVARegApp_WZJetsTo2L2Q_eff_all_tightoppositebTag.root") # iFileBkg = TFile.Open(location+"TMVARegApp_dataTotal_all%s_relaxedsamebTag.root" %postName) # bkg1Chain = iFileBkg1.Get("eventTree") # bkg2Chain = iFileBkg2.Get("eventTree") # bkg3Chain = iFileBkg3.Get("eventTree") # bkg4Chain = iFileBkg4.Get("eventTree") # bkg5Chain = iFileBkg5.Get("eventTree") # bkg6Chain = iFileBkg6.Get("eventTree") # bkg7Chain = iFileBkg7.Get("eventTree") # bkg8Chain = iFileBkg8.Get("eventTree") # bkg9Chain = iFileBkg9.Get("eventTree") # bkgChain = iFileBkg.Get("eventTree") # Global event weights (see below for setting event-wise weights) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # tmpHist1 = iFileBkg1.Get('preselection') # ttWeight = 26.2/tmpHist1.GetBinContent(1) # tmpHist2 = iFileBkg2.Get('preselection') # ZZWeight = 2.5/tmpHist2.GetBinContent(1) # tmpHist3 = iFileBkg3.Get('preselection') # tt_semiWeight = 109.3/tmpHist3.GetBinContent(1) # tmpHist4 = iFileBkg4.Get('preselection') # DY2JetsWeight = 181/tmpHist4.GetBinContent(1) # tmpHist5 = iFileBkg5.Get('preselection') # DY3JetsWeight = 51.1/tmpHist5.GetBinContent(1) # tmpHist6 = iFileBkg6.Get('preselection') # W1JetsToLNu = 5400/tmpHist6.GetBinContent(1) # tmpHist7 = iFileBkg7.Get('preselection') # W2JetsToLNu = 1750/tmpHist7.GetBinContent(1) # tmpHist8 = iFileBkg8.Get('preselection') # W3JetsToLNu = 519/tmpHist8.GetBinContent(1) # tmpHist9 = iFileBkg9.Get('preselection') # WZJetsTo2L2Q = 2.207/tmpHist9.GetBinContent(1) # print "tt:\t\t%.2f" %(bkg1Chain.GetEntries()*ttWeight*Lumi*1000) # print "ZZ:\t\t%.2f" %(bkg2Chain.GetEntries()*ZZWeight*Lumi*1000) # print "tt semi:\t%.2f" %(bkg3Chain.GetEntries()*tt_semiWeight*Lumi*1000) # print "DY2:\t\t%.2f" %(bkg4Chain.GetEntries()*DY2JetsWeight*Lumi*1000) # print "DY3:\t\t%.2f" %(bkg5Chain.GetEntries()*DY3JetsWeight*Lumi*1000) # print "WJ1:\t\t%.2f" %(bkg6Chain.GetEntries()*W1JetsToLNu*Lumi*1000) # print "WJ2:\t\t%.2f" %(bkg7Chain.GetEntries()*W2JetsToLNu*Lumi*1000) # print "WJ3:\t\t%.2f" %(bkg8Chain.GetEntries()*W3JetsToLNu*Lumi*1000) # print "WZJ:\t\t%.2f" %(bkg9Chain.GetEntries()*WZJetsTo2L2Q*Lumi*1000) # print "QCD:\t\t%.2f" %(bkgChain.GetEntries()*0.05) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees # factory.AddBackgroundTree( bkgChain, 0.05) # factory.AddBackgroundTree( bkg1Chain, ttWeight*Lumi*1000) # factory.AddBackgroundTree( bkg2Chain, ZZWeight*Lumi*1000) # factory.AddBackgroundTree( bkg3Chain, tt_semiWeight*Lumi*1000) # factory.AddBackgroundTree( bkg4Chain, DY2JetsWeight*Lumi*1000) # factory.AddBackgroundTree( bkg5Chain, DY3JetsWeight*Lumi*1000) # factory.AddBackgroundTree( bkg6Chain, W1JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg7Chain, W2JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg8Chain, W3JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg9Chain, WZJetsTo2L2Q*Lumi*1000) factory.SetSignalWeightExpression('triggerEff') factory.SetBackgroundWeightExpression('triggerEff') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("chi2KinFit > -10") mycutBkg = TCut("chi2KinFit > -10") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_both_%s.xml' % massPoint os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList))+'vars' Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth outfname = "dataset/weights/TMVA_"+Note+".root" # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Output file outputFile = TFile( outfname, 'RECREATE' ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose( verbose ) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note dataloader = TMVA.DataLoader('dataset') for iVar in varList: if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I') else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F') inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir+infname) sigChain = iFileSig.Get("ljmet") dataloader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir+varsList.bkg[i])) print inputDir+varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue dataloader.AddBackgroundTree( bkg_trees_list[i], 1) signalWeight = 1 dataloader.SetSignalWeightExpression( weightStrS ) dataloader.SetBackgroundWeightExpression( weightStrB ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028' model = Sequential() model.add(Dense(100, activation='relu', input_dim=53)) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(2, activation="sigmoid"))) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',]) # Store model to file model.save('model.h5') model.summary() if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close() # save plots: print "DONE"
def main(): try: # retrive command line options shortopts = "m:i:t:b:s:a:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "bkgList=", "sigList=", "anaType=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME bkgList = DEFAULT_BKGLIST sigList = DEFAULT_SIGLIST anaType = DEFAULT_ANATYPE treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-b", "--bkgList"): bkgList = a elif o in ("-s", "--sigList"): sigList = a elif o in ("-a", "--anaType"): anaType = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) #Edited setupTMVA(pathToTMVA) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string suffix = outfname.split("/")[-1].split(".")[0] factory = TMVA.Factory( "TMVAClassification_%s" % suffix, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #Edited #follow 2LSS note Ch5.2: Disciminating variables factory.AddVariable("mT2 := sig.mT2", 'F') factory.AddVariable("pt := l12.pt", 'F') factory.AddVariable("MET := sig.MetRel", 'F') factory.AddVariable("Ht := Sum$(jets.pt) + Sum$(leps.pt)", 'F') factory.AddVariable("mTl1 := leps.mT[0]", 'F') factory.AddVariable("mTl2 := leps.mT[1]", 'F') factory.AddVariable("ll_dPhi:= l12.dPhi", 'F') factory.AddVariable( "l12m := (int(abs(leps.ID[0]))!=int(abs(leps.ID[1])))*100 + l12.m", 'F') #ISR region if (anaType == "doISR"): factory.AddVariable("JetMET_dPhi := jets.MET_dPhi[0]", 'F') factory.AddVariable("MET_JetPt_R := sig.MetRel/jets.pt[0]", 'F') factory.AddVariable("l1Pt_JetPt_R := leps.pt[0]/jets.pt[0]", 'F') #factory.AddSpectator( "pt1 := leps.pt[0]" , 'F' ) #factory.AddSpectator( "pt2 := leps.pt[1]" , 'F' ) #factory.AddSpectator( "ID1 := int(leps.ID[0])" , 'I' ) #factory.AddSpectator( "ID2 := int(leps.ID[1])" , 'I' ) #factory.AddSpectator( "nCentralJets := Sum$(jets.pt>20 && abs(jets.eta)<2.4)" , 'I' ) #FIXME setupXsecDB(pathToSUSYTools) from ROOT.SUSY import CrossSectionDB xsecDB = CrossSectionDB(pathToSUSYTools + "data/mc15_13TeV/") #read in training data openedInFileList = [] # Read input sig sigList = open(sigList, "r") for infname in sigList: inFile = TFile.Open(infname[:-1]) openedInFileList.append(inFile) hCutFlow = inFile.FindObjectAny("hCutFlow") mcEntry = hCutFlow.GetBinContent(1) #FIXME: hard coded extract runNum from filePath m = re.match(".*\.([0-9]{6})\..*", infname) runNum = int(m.groups()[0]) xSECxEff = xsecDB.xsectTimesEff(runNum, 125) + xsecDB.xsectTimesEff( runNum, 127) #125,127 is channel no. # Get the trees for training signal = inFile.Get("Data_") # Global event weights (see below for setting event-wise weights) #signalWeight = getXSECxEff(xsecDB, infname) * tarLumi / mcEntry #signalWeight = xSECxEff * tarLumi / mcEntry signalWeight = 1.0 * tarLumi / mcEntry #treat diff SUSY scenario with equal weight if signalWeight <= 0: print "Encounter <=0 weight sample %s , skipped" % infname continue print "mc sig ", runNum, mcEntry, xSECxEff factory.AddSignalTree(signal, signalWeight) sigList.close() # Read input bkg bkgList = open(bkgList, "r") for infname in bkgList: inFile = TFile.Open(infname[:-1]) openedInFileList.append(inFile) if "physics" in infname: #its real data print "data bkg", infname[:-1] background = inFile.Get("CFlip_") if background: factory.AddBackgroundTree(background, 1.0) background = inFile.Get("FakeLep_") if background: factory.AddBackgroundTree(background, 1.0) else: #its MC data hCutFlow = inFile.FindObjectAny("hCutFlow") mcEntry = hCutFlow.GetBinContent(1) #FIXME: hard coded extract runNum from filePath m = re.match(".*\.([0-9]{6})\..*", infname) runNum = int(m.groups()[0]) xSECxEff = xsecDB.xsectTimesEff(runNum) # Get trees for training background = inFile.Get("Data_") # Global event weights (see below for setting event-wise weights) backgroundWeight = xSECxEff * tarLumi / mcEntry if backgroundWeight <= 0: print "Encounter <=0 weight sample %s , skipped" % infname print "mc bkg ", runNum, mcEntry, xSECxEff factory.AddBackgroundTree(background, backgroundWeight) bkgList.close() # event-wise weights #factory.SetSignalWeightExpression( "weight" ) #factory.SetBackgroundWeightExpression( "weight" ) factory.SetSignalWeightExpression("ElSF*MuSF") factory.SetBackgroundWeightExpression( "(CFlipWeight0*FakeLepWeight0)!=1.0 ? CFlipWeight0*FakeLepWeight0 : !TMath::IsNaN(weight)? ElSF*MuSF*weight: 0.0" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) # trigCut = "sig.trigCode!=0" #"HLT_mu24_iloose_L1MU15" for mumu emu, "HLT_e24_lhmedium_iloose_L1EM20VH" for ee trigCut = "((nMu>0) && (sig.trigCode & (1<<2))) || ((nMu==0) && (sig.trigCode & (1<<26)))" grlCut = "evtInfo.passGRL==1" wCut = "weight>0 && weight<1e9" tauCut = "1" # "nTau==0" FIXME nTau not properly filled in NTUP yet.. bjetCut = "Sum$(jets.isBJet)==0" cosmicCut = "Sum$(leps.isCosmic)==0" htCut = "(Sum$(jets.pt) + Sum$(leps.pt))>40" posWCut = "FakeLepWeight0>0" isrCut = "Sum$(jets.pt>20 && abs(jets.eta)<2.4) %s" % ( ">0" if anaType == "doISR" else "==0") #nCentralJets>0 or ==0 zMassCut = "!(int(abs(leps.ID[0])) == int(abs(leps.ID[1])) && fabs(l12.m - 91.1876)<=5)" #commonCut = "&&".join(["(%s)"%cut for cut in [trigCut , grlCut , bjetCut, cosmicCut]]) commonCut = "&&".join([ "(%s)" % cut for cut in [trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut] ]) commonCut = TCut(commonCut) sigCut = "&&".join([ "(%s)" % cut for cut in [trigCut, grlCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut] ]) sigCut = TCut(sigCut) bkgCut = "&&".join([ "(%s)" % cut for cut in [ trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut, posWCut ] ]) bkgCut = TCut(bkgCut) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( sigCut, bkgCut, "nTrain_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V" ) #"nTrain_Signal=0:nTrain_Background=2000:SplitMode=Random:NormMode=EqualNumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=2:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # Retrive command line options shortopts = "m:i:o:d:vh?" longopts = [ "methods=", "inputfile=", "outputfile=", "datatype=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # Print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG methods = DEFAULT_METHODS directory = DEFAULT_DATA verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--datatype"): directory = a elif o in ("-v", "--verbose"): verbose = True # Print methods #take leading and trailing white space out methods = methods.strip(" ") mlist = methods.replace(' ', ',').split(',') print "=== TMVApplication: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch print("ROOT classes successfully imported!\n") # DCS 17/06/2016 # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI tmvadir = "/home/dean/software/tmva/TMVA-v4.2.0/test" macro = os.path.join(tmvadir, "TMVAlogon.C") loadmacro = os.path.join(tmvadir, "TMVAGui.C") gROOT.SetMacroPath(tmvadir) gROOT.Macro(macro) gROOT.LoadMacro(loadmacro) print("ROOT macro path loaded correctly!\n") # Import TMVA classes from ROOT from ROOT import TMVA # Create the Reader object reader = TMVA.Reader("!Color") var1 = array('f', [0]) var2 = array('f', [0]) var3 = array('f', [0]) var4 = array('f', [0]) var5 = array('f', [0]) var6 = array('f', [0]) var7 = array('f', [0]) var8 = array('f', [0]) var9 = array('f', [0]) var10 = array('f', [0]) variables = [var1, var2, var3, var4, var5, var6, var7, var8, var9, var10] var_names = [ 'peaks', 'mean_peaks', 'integral', 'integral_over_peaks', 'max', 'mean', 'max_over_mean', 'std_dev_peaks', 'entropy', 'ps_integral' ] #variables = [var1, var2, var3, var4] #var_names = ['var1', 'var2', 'var3', 'var4'] for name, var in zip(var_names, variables): reader.AddVariable(name, var) print("Variables successfully loaded!\n") #reader.AddVariable("Nclusters.value", var1) #reader.AddVariable("(TMath::Log10(eventinfo_ALLOfflinePulseSeriesReco.tot_charge))*1000/eventinfo_ALLOfflinePulseSeriesReco.length" ,var2) #reader.AddVariable("MDCOGLaunches.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length",var3) #reader.AddVariable("Nclusters.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length" ,var4) #reader.AddVariable("NSMT8TRIGGER.value/eventinfo_ALLOfflinePulseSeriesReco.nstrings",var5) #reader.AddVariable("MedianCluster.value",var6) # book the MVA methods #dir = "weights/" #prefix = "TMVAClassification_" # #for m in mlist: # print( m + " method", dir + prefix + m + ".weights.xml") # reader.BookMVA( m + " method", dir + prefix + m + ".weights.xml" ) weight_dir = "/home/dean/capstone/TMVA-classifier/weights/" weights = [f for f in os.listdir(weight_dir) if ".xml" in f] for i, f in enumerate(weights): reader.BookMVA("BDT_{}".format(i), os.path.join(weight_dir, f)) #only care about BDT # reader.BookMVA("BDT","weights/TMVAClassification_BDT.weights.xml") ####################################################################### # For an example how to apply your own plugin method, please see # TMVA/macros/TMVApplication.C ####################################################################### # Book output histograms nbin = 100 histList = [] for m in mlist: histList.append(TH1F(m, m, nbin, -3, 3)) for h in histList: h.Fill(reader.EvaluateMVA(h.GetName() + " method")) # Book example histogram for probability (the other methods would be done similarly) if "Fisher" in mlist: probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1) rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0, 1) filelist = glob(directory + "/" + "Level4b*.hdf") print 30 * "#" print "the filelist, ", filelist print 30 * "--" for file in filelist: try: startfile = tables.openFile(file, "a") #DELETE BDTs if they exist for var in startfile.root._v_children.keys(): if re.match("BDT_", var): startfile.removeNode("/", var) startfile.removeNode("/__I3Index__", var) #NOW CLONE THE NODE for name in histList: startfile.copyNode("/__I3Index__/StdDCOGLaunches", "/__I3Index__", str(name.GetName())) startfile.copyNode("/StdDCOGLaunches", "/", str(name.GetName())) startfile.close() h5 = tables.openFile(file, 'r') mcog_over_t = numpy.divide(h5.root.MDCOGLaunches.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) q_over_t = numpy.divide(numpy.log10(h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.tot_charge[:]),\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) ncluster_over_t = numpy.divide(h5.root.Nclusters.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) nsmt8_over_string = numpy.divide(h5.root.NSMT8TRIGGER.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.nstrings[:]) s1 = array('f', h5.root.Nclusters.cols.value[:]) s2 = array('f', q_over_t) s3 = array('f', mcog_over_t[:]) s4 = array('f', ncluster_over_t[:]) s5 = array('f', nsmt8_over_string[:]) s6 = array('f', h5.root.MedianCluster.cols.value[:]) h5.close() result = numpy.zeros((len(histList), len(s1)), numpy.dtype([('Classifier', numpy.double)])) for ievt in range(len(s1)): #if ievt%1000 == 0: # print "--- ... Processing event: %i" % ievt # Fill event in memory # Compute MVA input variables var1[0] = s1[ievt] var2[0] = s2[ievt] var3[0] = s3[ievt] var4[0] = s4[ievt] var5[0] = s5[ievt] var6[0] = s6[ievt] # Fill histograms with MVA outputs for j, h in enumerate(histList): h.Fill(reader.EvaluateMVA(h.GetName() + " method")) result[j][ievt]["Classifier"] = reader.EvaluateMVA( h.GetName() + " method") endfile = tables.openFile(file, 'a') for k, name in enumerate(histList): modifiedNode = endfile.getNode("/", str(name.GetName())) modifiedNode.cols.value[ievt] = result[k][ievt][ "Classifier"] endfile.close() print time.strftime('Elapsed time - %H:%M:%S', time.gmtime(time.clock())) #sanity check of the mva values writen in the hdf files #ifile=tables.openFile(file,'r') # if len(ifile.root.BDT_400_20.cols.BDT) != len(ifile.root.MPEFit.cols.Zenith): # ifile.close() # print "Something wrong with file: ", k, j+1 #exit() #os.system("rm "+"/data/icecube01/users/redlpete/IC59L2/TableIOL3/H5FilesIncludingScores/H5%0.2d%0.2d.hd5"%(k,j+1)) # ifile.close() except ImportError as exce: print "file does not exist", k, j + 1 print exce exit() ifile = tables.openFile("test.h5", mode='a') class Score(IsDescription): score = Float64Col() group = ifile.createGroup("/", 'Background', 'Scoreinfo') table = ifile.createTable(group, 'score', Score, "Example") particle = table.row for n in range(len(result)): particle['score'] = result[n] particle.append() print "--- End of event loop: %s" % sw.Print() target = TFile("TMVApp1.root", "RECREATE") for h in histList: h.Write() target.Close() print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms" print "==> TMVApplication is done!"
def main(): try: # retrive command line options shortopts = "m:o:l:s:vh?" longopts = ["methods=", "outputfile=", "lepton=", "signal=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) lepton = DEFAULT_LEPTON outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False signal = DEFAULT_SIGNAL for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-s", "--signal"): signal = a elif o in ("-v", "--verbose"): verbose = True elif o in ("-l", "--lepton"): if a == "electron": lepton = ELECTRON elif a == "muon": lepton = MUON else: print "*** Lepton must be 'electron' or 'muon ****" sys.exit(1) # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # let's open the input files if lepton == ELECTRON: print "Lepton is ELECTRON." path = "/data3/jmitrevs/lepphoton/elphoton_ntuple2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_el.root" wino_600_500FileName = path + "wino_600_500_el.root" wino_1000_200FileName = path + "wino_1000_200_el.root" wino_1500_300FileName = path + "wino_1500_300_el.root" WlepnuFileName_Np0 = path + "Wenu_Np0.root" WlepnuFileName_Np1 = path + "Wenu_Np1.root" WlepnuFileName_Np2 = path + "Wenu_Np2.root" WlepnuFileName_Np3 = path + "Wenu_Np3.root" WlepnuFileName_Np4 = path + "Wenu_Np4.root" WlepnuFileName_Np5 = path + "Wenu_Np5.root" ZleplepFileName_Np0 = path + "Zee_Np0.root" ZleplepFileName_Np1 = path + "Zee_Np1.root" ZleplepFileName_Np2 = path + "Zee_Np2.root" ZleplepFileName_Np3 = path + "Zee_Np3.root" ZleplepFileName_Np4 = path + "Zee_Np4.root" ZleplepFileName_Np5 = path + "Zee_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_enu.root" st_schan_lepnuFileName = path + "st_schan_enu.root" ZleplepgammaFileName = path + "Zeegamma.root" elif lepton == MUON: print "Lepton is MUON." path = "/data3/jmitrevs/lepphoton/muphoton2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_mu.root" wino_600_500FileName = path + "wino_600_500_mu.root" wino_1000_200FileName = path + "wino_1000_200_mu.root" wino_1500_300FileName = path + "wino_1500_300_mu.root" WlepnuFileName_Np0 = path + "Wmunu_Np0.root" WlepnuFileName_Np1 = path + "Wmunu_Np1.root" WlepnuFileName_Np2 = path + "Wmunu_Np2.root" WlepnuFileName_Np3 = path + "Wmunu_Np3.root" WlepnuFileName_Np4 = path + "Wmunu_Np4.root" WlepnuFileName_Np5 = path + "Wmunu_Np5.root" ZleplepFileName_Np0 = path + "Zmumu_Np0.root" ZleplepFileName_Np1 = path + "Zmumu_Np1.root" ZleplepFileName_Np2 = path + "Zmumu_Np2.root" ZleplepFileName_Np3 = path + "Zmumu_Np3.root" ZleplepFileName_Np4 = path + "Zmumu_Np4.root" ZleplepFileName_Np5 = path + "Zmumu_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_munu.root" st_schan_lepnuFileName = path + "st_schan_munu.root" ZleplepgammaFileName = path + "Zmumugamma.root" else: raise ValueError("Lepton has to be ELECTRON or MUON.") WtaunuFileName_Np0 = path + "Wtaunu_Np0.root" WtaunuFileName_Np1 = path + "Wtaunu_Np1.root" WtaunuFileName_Np2 = path + "Wtaunu_Np2.root" WtaunuFileName_Np3 = path + "Wtaunu_Np3.root" WtaunuFileName_Np4 = path + "Wtaunu_Np4.root" WtaunuFileName_Np5 = path + "Wtaunu_Np5.root" ZtautauFileName_Np0 = path + "Ztautau_Np0.root" ZtautauFileName_Np1 = path + "Ztautau_Np1.root" ZtautauFileName_Np2 = path + "Ztautau_Np2.root" ZtautauFileName_Np3 = path + "Ztautau_Np3.root" ZtautauFileName_Np4 = path + "Ztautau_Np4.root" ZtautauFileName_Np5 = path + "Ztautau_Np5.root" st_tchan_taunuFileName = path + "st_tchan_taunu.root" st_schan_taunuFileName = path + "st_schan_taunu.root" st_WtFileName = path + "st_Wt.root" WgammaFileName_Np0 = path + "Wgamma_Np0.root" WgammaFileName_Np1 = path + "Wgamma_Np1.root" WgammaFileName_Np2 = path + "Wgamma_Np2.root" WgammaFileName_Np3 = path + "Wgamma_Np3.root" WgammaFileName_Np4 = path + "Wgamma_Np4.root" WgammaFileName_Np5 = path + "Wgamma_Np5.root" ttbarFileName = path + "ttbar.root" WWFileName = path + "WW.root" WZFileName = path + "WZ.root" ZZFileName = path + "ZZ.root" ZtautaugammaFileName = path + "Ztautaugamma.root" ########################################### wino_600_200File = TFile(wino_600_200FileName) wino_600_500File = TFile(wino_600_500FileName) wino_1000_200File = TFile(wino_1000_200FileName) wino_1500_300File = TFile(wino_1500_300FileName) WlepnuFile_Np0 = TFile(WlepnuFileName_Np0) WlepnuFile_Np1 = TFile(WlepnuFileName_Np1) WlepnuFile_Np2 = TFile(WlepnuFileName_Np2) WlepnuFile_Np3 = TFile(WlepnuFileName_Np3) WlepnuFile_Np4 = TFile(WlepnuFileName_Np4) WlepnuFile_Np5 = TFile(WlepnuFileName_Np5) WtaunuFile_Np0 = TFile(WtaunuFileName_Np0) WtaunuFile_Np1 = TFile(WtaunuFileName_Np1) WtaunuFile_Np2 = TFile(WtaunuFileName_Np2) WtaunuFile_Np3 = TFile(WtaunuFileName_Np3) WtaunuFile_Np4 = TFile(WtaunuFileName_Np4) WtaunuFile_Np5 = TFile(WtaunuFileName_Np5) ZleplepFile_Np0 = TFile(ZleplepFileName_Np0) ZleplepFile_Np1 = TFile(ZleplepFileName_Np1) ZleplepFile_Np2 = TFile(ZleplepFileName_Np2) ZleplepFile_Np3 = TFile(ZleplepFileName_Np3) ZleplepFile_Np4 = TFile(ZleplepFileName_Np4) ZleplepFile_Np5 = TFile(ZleplepFileName_Np5) ZtautauFile_Np0 = TFile(ZtautauFileName_Np0) ZtautauFile_Np1 = TFile(ZtautauFileName_Np1) ZtautauFile_Np2 = TFile(ZtautauFileName_Np2) ZtautauFile_Np3 = TFile(ZtautauFileName_Np3) ZtautauFile_Np4 = TFile(ZtautauFileName_Np4) ZtautauFile_Np5 = TFile(ZtautauFileName_Np5) WgammaFile_Np0 = TFile(WgammaFileName_Np0) WgammaFile_Np1 = TFile(WgammaFileName_Np1) WgammaFile_Np2 = TFile(WgammaFileName_Np2) WgammaFile_Np3 = TFile(WgammaFileName_Np3) WgammaFile_Np4 = TFile(WgammaFileName_Np4) WgammaFile_Np5 = TFile(WgammaFileName_Np5) ttbarFile = TFile(ttbarFileName) st_tchan_lepnuFile = TFile(st_tchan_lepnuFileName) st_tchan_taunuFile = TFile(st_tchan_taunuFileName) st_schan_lepnuFile = TFile(st_schan_lepnuFileName) st_schan_taunuFile = TFile(st_schan_taunuFileName) st_WtFile = TFile(st_WtFileName) WWFile = TFile(WWFileName) WZFile = TFile(WZFileName) ZZFile = TFile(ZZFileName) ZleplepgammaFile = TFile(ZleplepgammaFileName) ZtautaugammaFile = TFile(ZtautaugammaFileName) ########################################### wino_600_200Tree = wino_600_200File.Get("GammaLepton") wino_600_500Tree = wino_600_500File.Get("GammaLepton") wino_1000_200Tree = wino_1000_200File.Get("GammaLepton") wino_1500_300Tree = wino_1500_300File.Get("GammaLepton") WlepnuTree_Np0 = WlepnuFile_Np0.Get("GammaLepton") WlepnuTree_Np1 = WlepnuFile_Np1.Get("GammaLepton") WlepnuTree_Np2 = WlepnuFile_Np2.Get("GammaLepton") WlepnuTree_Np3 = WlepnuFile_Np3.Get("GammaLepton") WlepnuTree_Np4 = WlepnuFile_Np4.Get("GammaLepton") WlepnuTree_Np5 = WlepnuFile_Np5.Get("GammaLepton") WtaunuTree_Np0 = WtaunuFile_Np0.Get("GammaLepton") WtaunuTree_Np1 = WtaunuFile_Np1.Get("GammaLepton") WtaunuTree_Np2 = WtaunuFile_Np2.Get("GammaLepton") WtaunuTree_Np3 = WtaunuFile_Np3.Get("GammaLepton") WtaunuTree_Np4 = WtaunuFile_Np4.Get("GammaLepton") WtaunuTree_Np5 = WtaunuFile_Np5.Get("GammaLepton") ZleplepTree_Np0 = ZleplepFile_Np0.Get("GammaLepton") ZleplepTree_Np1 = ZleplepFile_Np1.Get("GammaLepton") ZleplepTree_Np2 = ZleplepFile_Np2.Get("GammaLepton") ZleplepTree_Np3 = ZleplepFile_Np3.Get("GammaLepton") ZleplepTree_Np4 = ZleplepFile_Np4.Get("GammaLepton") ZleplepTree_Np5 = ZleplepFile_Np5.Get("GammaLepton") ZtautauTree_Np0 = ZtautauFile_Np0.Get("GammaLepton") ZtautauTree_Np1 = ZtautauFile_Np1.Get("GammaLepton") ZtautauTree_Np2 = ZtautauFile_Np2.Get("GammaLepton") ZtautauTree_Np3 = ZtautauFile_Np3.Get("GammaLepton") ZtautauTree_Np4 = ZtautauFile_Np4.Get("GammaLepton") ZtautauTree_Np5 = ZtautauFile_Np5.Get("GammaLepton") WgammaTree_Np0 = WgammaFile_Np0.Get("GammaLepton") WgammaTree_Np1 = WgammaFile_Np1.Get("GammaLepton") WgammaTree_Np2 = WgammaFile_Np2.Get("GammaLepton") WgammaTree_Np3 = WgammaFile_Np3.Get("GammaLepton") WgammaTree_Np4 = WgammaFile_Np4.Get("GammaLepton") WgammaTree_Np5 = WgammaFile_Np5.Get("GammaLepton") ttbarTree = ttbarFile.Get("GammaLepton") st_tchan_lepnuTree = st_tchan_lepnuFile.Get("GammaLepton") st_tchan_taunuTree = st_tchan_taunuFile.Get("GammaLepton") st_schan_lepnuTree = st_schan_lepnuFile.Get("GammaLepton") st_schan_taunuTree = st_schan_taunuFile.Get("GammaLepton") st_WtTree = st_WtFile.Get("GammaLepton") WWTree = WWFile.Get("GammaLepton") WZTree = WZFile.Get("GammaLepton") ZZTree = ZZFile.Get("GammaLepton") ZleplepgammaTree = ZleplepgammaFile.Get("GammaLepton") ZtautaugammaTree = ZtautaugammaFile.Get("GammaLepton") ############################## # and now the weights # wino_600_200_scale = 7.005 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 0.0129441737417 # Wgamma_Np1_scale = 0.0635170304401 # Wgamma_Np2_scale = 0.140920227273 # Wgamma_Np3_scale = 0.140622611111 # Wgamma_Np4_scale = 0.134589 # Wgamma_Np5_scale = 0.123308 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_schan_lepnu_scale = 0.0092735093327 # st_schan_taunu_scale = 0.00926981472204 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 #################ntuple_pt25 # wino_600_200_scale = 1.401 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 1.08706263428 # Wgamma_Np1_scale = 0.734676952566 # Wgamma_Np2_scale = 0.733754057143 # Wgamma_Np3_scale = 0.149752323594 # Wgamma_Np4_scale = 0.157524392683 # Wgamma_Np5_scale = 0.1281354 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 # gamma_Np1_scale = 4.06453310851 # gamma_Np2_scale = 3.3709968686 # gamma_Np3_scale = 1.38728943513 # gamma_Np4_scale = 1.41464077802 # gamma_Np5_scale = 1.23661096137 if lepton == ELECTRON: wino_600_200_scale = 0.291875 wino_600_500_scale = 2.69352 wino_1000_200_scale = 4.1325 wino_1500_300_scale = 0.0093269 wino_1000_100_scale = 69.5 wino_800_700_scale = 0.2328 Wlepnu_Np0_scale = 12.0052623622 Wlepnu_Np1_scale = 3.13076456857 Wlepnu_Np2_scale = 0.60296853897 Wlepnu_Np3_scale = 0.603183318846 Wlepnu_Np4_scale = 0.62088 Wlepnu_Np5_scale = 0.600008571551 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631361988532 Zleplep_Np1_scale = 0.629541167757 Zleplep_Np2_scale = 0.625618828688 Zleplep_Np3_scale = 0.634090909091 Zleplep_Np4_scale = 0.6 Zleplep_Np5_scale = 0.51875 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.200916540624 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.17064063358 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 elif lepton == MUON: wino_600_200_scale = 0.291875 Wlepnu_Np0_scale = 11.9925371604 Wlepnu_Np1_scale = 3.13058966 Wlepnu_Np2_scale = 0.601616497017 Wlepnu_Np3_scale = 0.605913424797 Wlepnu_Np4_scale = 0.606001176701 Wlepnu_Np5_scale = 0.593142857143 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631664271554 Zleplep_Np1_scale = 0.628327597475 Zleplep_Np2_scale = 0.62551337696 Zleplep_Np3_scale = 0.635795454545 Zleplep_Np4_scale = 0.572916666667 Zleplep_Np5_scale = 0.48125 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.201919368378 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963963963964 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.08704733658 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 if signal == "600_200": factory.AddSignalTree(wino_600_200Tree, wino_600_200_scale) elif signal == "600_500": factory.AddSignalTree(wino_600_500Tree, wino_600_500_scale) elif signal == "1000_200": factory.AddSignalTree(wino_1000_200Tree, wino_1000_200_scale) elif signal == "1500_300": factory.AddSignalTree(wino_1500_300Tree, wino_1500_300_scale) else: print "*** signal designation not supported: %s ****" % signal sys.exit(1) factory.AddBackgroundTree(WlepnuTree_Np0, Wlepnu_Np0_scale) factory.AddBackgroundTree(WlepnuTree_Np1, Wlepnu_Np1_scale) factory.AddBackgroundTree(WlepnuTree_Np2, Wlepnu_Np2_scale) factory.AddBackgroundTree(WlepnuTree_Np3, Wlepnu_Np3_scale) factory.AddBackgroundTree(WlepnuTree_Np4, Wlepnu_Np4_scale) factory.AddBackgroundTree(WlepnuTree_Np5, Wlepnu_Np5_scale) #factory.AddBackgroundTree(WtaunuTree_Np0, Wtaunu_Np0_scale) factory.AddBackgroundTree(WtaunuTree_Np1, Wtaunu_Np1_scale) factory.AddBackgroundTree(WtaunuTree_Np2, Wtaunu_Np2_scale) factory.AddBackgroundTree(WtaunuTree_Np3, Wtaunu_Np3_scale) factory.AddBackgroundTree(WtaunuTree_Np4, Wtaunu_Np4_scale) factory.AddBackgroundTree(WtaunuTree_Np5, Wtaunu_Np5_scale) # factory.AddBackgroundTree(ZleplepTree_Np0, Zleplep_Np0_scale) # factory.AddBackgroundTree(ZleplepTree_Np1, Zleplep_Np1_scale) # factory.AddBackgroundTree(ZleplepTree_Np2, Zleplep_Np2_scale) # factory.AddBackgroundTree(ZleplepTree_Np3, Zleplep_Np3_scale) # factory.AddBackgroundTree(ZleplepTree_Np4, Zleplep_Np4_scale) # factory.AddBackgroundTree(ZleplepTree_Np5, Zleplep_Np5_scale) # factory.AddBackgroundTree(ZtautauTree_Np0, Ztautau_Np0_scale) # factory.AddBackgroundTree(ZtautauTree_Np1, Ztautau_Np1_scale) # factory.AddBackgroundTree(ZtautauTree_Np2, Ztautau_Np2_scale) # factory.AddBackgroundTree(ZtautauTree_Np3, Ztautau_Np3_scale) # factory.AddBackgroundTree(ZtautauTree_Np4, Ztautau_Np4_scale) # factory.AddBackgroundTree(ZtautauTree_Np5, Ztautau_Np5_scale) factory.AddBackgroundTree(WgammaTree_Np0, Wgamma_Np0_scale) factory.AddBackgroundTree(WgammaTree_Np1, Wgamma_Np1_scale) factory.AddBackgroundTree(WgammaTree_Np2, Wgamma_Np2_scale) factory.AddBackgroundTree(WgammaTree_Np3, Wgamma_Np3_scale) factory.AddBackgroundTree(WgammaTree_Np4, Wgamma_Np4_scale) factory.AddBackgroundTree(WgammaTree_Np5, Wgamma_Np5_scale) factory.AddBackgroundTree(ttbarTree, ttbar_scale) factory.AddBackgroundTree(st_tchan_lepnuTree, st_tchan_lepnu_scale) factory.AddBackgroundTree(st_tchan_taunuTree, st_tchan_taunu_scale) # factory.AddBackgroundTree(st_schan_lepnuTree, st_schan_lepnu_scale) # factory.AddBackgroundTree(st_schan_taunuTree, st_schan_taunu_scale) factory.AddBackgroundTree(st_WtTree, st_Wt_scale) factory.AddBackgroundTree(WWTree, WW_scale) factory.AddBackgroundTree(WZTree, WZ_scale) factory.AddBackgroundTree(ZZTree, ZZ_scale) factory.AddBackgroundTree(ZleplepgammaTree, Zleplepgamma_scale) factory.AddBackgroundTree(ZtautaugammaTree, Ztautaugamma_scale) # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "MET := sqrt(Metx*Metx+Mety*Mety)", 'F' ) # factory.AddVariable( "HT", 'F' ) factory.AddVariable( "PhotonPt[0]", 'F' ) #factory.AddVariable( "ElectronPt[0]", 'F' ) if lepton == ELECTRON: factory.AddVariable( "mTel", 'F' ) else: factory.AddVariable( "mTmu", 'F' ) #factory.AddVariable( "abs(PhotonEta[0])", 'F' ) #factory.AddVariable( "abs(ElectronEta[0])", 'F' ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "abs(PhotonEta[0]) < 2.01 && abs(ElectronEta[0]) < 2.01" ) if lepton == ELECTRON: mycutSig = TCut( "sqrt((PhotonEta[0]-ElectronEta[0])*(PhotonEta[0]-ElectronEta[0]) + (PhotonPhi[0]-ElectronPhi[0])*(PhotonPhi[0]-ElectronPhi[0])) > 0.7") else: mycutSig = TCut( "sqrt((PhotonEta[0]-MuonEta[0])*(PhotonEta[0]-MuonEta[0]) + (PhotonPhi[0]-MuonPhi[0])*(PhotonPhi[0]-MuonPhi[0])) > 0.7") #mycutSig = TCut( "PhotonPt[0] > 85000" ) mycutBkg = mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) # if "CutsGA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsGA", # "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) # if "CutsSA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsSA", # "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()