def train_factory(self, outputfile): '''Train using TMVA::Factory.''' # Make the DataLoader. self.prepare() # Make the factory. factory = TMVA.Factory(self.name, outputfile, str(self.factoryoptions)) factory.SetVerbose(self.verbose) methods = self.book_methods(factory) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputfile.Close() print '=== wrote root file {0}\n'.format(outputfile.GetName()) print '=== TMVAClassification is done!\n' weightsfiles = dict((m, self.weights_file(m)) for m in methods) classfiles = dict( (m, self.weights_file(m, '.class.C')) for m in methods) return weightsfiles, classfiles
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA.root', 'RECREATE') factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') dataloader = TMVA.DataLoader('dataset')
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA_SSSF.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') # factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') dataloader = TMVA.DataLoader('datasetSSSF04Feb') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue print sampleName sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V') # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C2", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=200:MaxDepth=2" ); factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C1", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=100:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C05", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=50:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4500", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4750", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=750:MaxDepth=2" ) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def TMVANN(filenameSTRING, outputDOTrootSTRING, sigtreeSTRINGS, bkgtreeSTRING, variablesSTRING): NNfile = R.TFile(outputDOTrootSTRING, "recreate") NNfile.Close() for sigtreeSTRING in sigtreeSTRINGS: from ROOT import TMVA file = R.TFile(filenameSTRING) #importing the datasetsignalslowlevel signaltree = file.Get(sigtreeSTRING) #setting signaltree backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree) sigweights = file.Get(sigtreeSTRING + weights) bkgweights = file.Get(bkgtreeSTRING + weights) TMVA.Tools.Instance() NNfile = R.TFile( outputDOTrootSTRING, "update") #Writing the root file required for the TMVA factory TMVAfactory = TMVA.Factory( "TMVANN", NNfile, "V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) TMVAfactory.SetVerbose(False) #Setting extra info (verbose) to false datasetsignalslowlevel = TMVA.DataLoader( "datasetsignalslowlevel") #Instantiating a datasetsignalslowlevel datasetsignalslowlevel.AddSignalTree(signaltree, 1.) #adding signal datasetsignalslowlevel.AddBackgroundTree(backgroundtree, 1.) #adding background print(sigweights) datasetsignalslowlevel.SetSignalWeightExpression(weights) datasetsignalslowlevel.SetBackgroundWeightExpression(weights) for i in variablesSTRING: #adding our training variables to the TMVA datasetsignalslowlevel.AddVariable(i) signalcut = R.TCut("") #Variables are already cut backgroundcut = R.TCut("") datasetsignalslowlevel.PrepareTrainingAndTestTree( signalcut, backgroundcut, "nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V" ) TMVAfactory.BookMethod( datasetsignalslowlevel, TMVA.Types.kMLP, "LowLevelNN_3layer25,20,10_100Epoch_tanhNeuron" + sigtreeSTRING, "H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=25,20,10:TestRate=5" ) TMVAfactory.TrainAllMethods() TMVAfactory.TestAllMethods() TMVAfactory.EvaluateAllMethods() NNfile.Close() NNfile = R.TFile(outputDOTrootSTRING, "update")
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() dataloader = TMVA.DataLoader('dataset_8Feb') output = TFile.Open('TMVA16.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents') factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500" ) # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT8","!H:!V:NTrees=1500:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.75:SeparationType=GiniIndex:nCuts=1000" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=1200:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=800" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT3", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=500" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT4", "!H:!V:NTrees=700:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=500" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07" , "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ); # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def TMVANN (filenameSTRING,outputDOTrootSTRING,sigtreeSTRINGS,bkgtreeSTRING,variablesSTRING): for sigtreeSTRING in sigtreeSTRINGS: from ROOT import TMVA file = R.TFile(filenameSTRING) #importing the dataset signaltree = file.Get(sigtreeSTRING) #setting signaltree backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree) sigweights = file.Get(sigtreeSTRING+weights) bkgweights = file.Get(bkgtreeSTRING+weights) TMVA.Tools.Instance() NNfile = R.TFile(outputDOTrootSTRING,"recreate") #Writing the root file required for the TMVA factory TMVAfactory = TMVA.Factory("TMVANN",NNfile,"V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification") TMVAfactory.SetVerbose(False) #Setting extra info (verbose) to false dataset = TMVA.DataLoader("dataset") #Instantiating a dataset dataset.AddSignalTree(signaltree,1.) #adding signal dataset.AddBackgroundTree(backgroundtree ,1.) #adding background print(sigweights) #dataset.SetSignalWeightExpression(weights) #dataset.SetBackgroundWeightExpression(weights) for i in variablesSTRING: #adding our training variables to the TMVA dataset.AddVariable(i) signalcut = R.TCut("") #Variables are already cut backgroundcut = R.TCut("") dataset.PrepareTrainingAndTestTree(signalcut,backgroundcut,"nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V") #using all signal and background points to train, random selection, normalised to summed event weights = number of events for each tree, no verbose #Booking some methods #TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8:TestRate=5") #Artifical Neural Network 1 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kFisher, "FisherMethod","H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10") #Fisher Method TMVAfactory.BookMethod(dataset,TMVA.Types.kLikelihood, "BayesLikelihood","H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=60") #Bayes likleihood TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_4Layer8Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8,8,8,8:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer6Node_500Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=6:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer3Node_100Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=3:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_2Layer5Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=5,5:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.TrainAllMethods() TMVAfactory.TestAllMethods() TMVAfactory.EvaluateAllMethods() NNfile.Close() print ('TMVANN Ran & made ROOT file ' + outputDOTrootSTRING+sigtreeSTRING)
def Evaluate(outdir): sys.stdout = open(outdir + '/tmva.log', 'w') # Output file output = TFile(outdir + '/tmva.root', 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" ) # Set the variables use for the analysis input = open(outdir + '/inputvars.txt') for variable in input.readlines(): factory.AddVariable(variable[:-1], 'F') # Set the weight directory TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights" # Limit the creation of correlation plots TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20 # Set the input file with signal and background events factory.SetInputTrees( outdir + '/signals.txt', outdir + '/backgrounds.txt' ) cutsig = TCut('') cutbkg = TCut('') factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" ) factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. output.Close()
def __init__(self, name, n_vars, n_targets=1, method='BDT', task='Classification'): self.name = name self.n_vars = n_vars self.n_targets = n_targets self.method = method self.task = task self.tmpdir = tempfile.mkdtemp() self.output = TFile(os.path.join(self.tmpdir, 'tmva_output.root'), 'recreate') self.factory = TMVA.Factory(name, self.output, 'AnalysisType={0}:Silent'.format(task)) for n in range(n_vars): self.factory.AddVariable('X_{0}'.format(n), 'F') if task == 'Regression': for n in range(n_targets): self.factory.AddTarget('y_{0}'.format(n), 'F')
def train_and_test_MVA(name, signal_tree, background_tree, output_file_name, n_sig, n_bgk): outfile = root_open(output_file_name, 'recreate') factory = TMVA.Factory(name, outfile, "!V:!Silent:Color:DrawProgressBar") # signal_tree.SetBranchStatus('*', 0) # background_tree.SetBranchStatus('*', 0) for var in variables: # signal_tree.SetBranchStatus(var, 1) # background_tree.SetBranchStatus(var, 1) factory.AddVariable(var, 'F') factory.AddSignalTree(signal_tree) factory.AddBackgroundTree(bkg_tree) # passes selection (currently marked as all variables are defined. cut1 = Cut('absolute_eta > 0') cut2 = Cut('angle_bl > 0') cut3 = Cut('M3 > 0') cut = cut1 & cut2 & cut3 training_options = "nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d:!V" % ( n_sig, n_bgk, n_sig, n_bgk) factory.PrepareTrainingAndTestTree(cut, cut, training_options) # methods are # PDE - RS method (PDERS) # K-Nearest Neighbour classifier (KNN) # Linear discriminant (LD) factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood", "!V:NAvEvtPerBin=50") # factory.BookMethod( TMVA.Types.kMLP, "MLP", "!V:NCycles=50:HiddenLayers=10,10:TestRate=5" ) # # factory.BookMethod( TMVA.Types.kBDT, "BDT", "!V:BoostType=Grad:nCuts=20:NNodesMax=5" ); # Train MVAs using the set of training events factory.TrainAllMethods() # ---- Evaluate all MVAs using the set of test events factory.TestAllMethods() # ----- Evaluate and compare performance of all configured MVAs factory.EvaluateAllMethods() outfile.close() bkg_file.close()
def add_variables(data_loader, good_variables): for variable in good_variables: if variable != "Bc_M": data_loader.AddVariable(variable, "F") return data_loader if __name__ == '__main__': decay_name = decay_names[decay_nb] output_file = TFile("~/TMVA/TMVAoutput" + decay_name + str(run) + ".root", "RECREATE") factory = TMVA.Factory("TMVA_" + decay_name, output_file, "DrawProgressBar=True") data_loader = TMVA.DataLoader("dataloader") moca_tree = dhand.combine_trees(run, decay_name, True) data_tree = dhand.combine_trees(run, decay_name, False) branches_to_keep = dhand.branch_selection(data_tree, branches, []) moca_tree = dhand.activate_branches(moca_tree, branches_to_keep) data_tree = dhand.activate_branches(data_tree, branches_to_keep) add_variables(data_loader, branches_to_keep) sgcut_test = TCut("runNumber%5==" + kfold + "&& (Bc_M > 5200 && Bc_M < 5400)") sgcut_train = TCut("runNumber%5!=" + kfold + "&& (Bc_M > 5200 && Bc_M < 5400)") bgcut_test = TCut("runNumber%5==" + kfold + "&& Bc_M > 5400") bgcut_train = TCut("runNumber%5!=" + kfold + "&& Bc_M > 5400")
from ROOT import TMVA, TFile, TTree, TCut, ObjectProxy from subprocess import call from os.path import isfile import ROOT from keras.models import Sequential from keras.layers import Dense, Activation from keras.regularizers import l2 from keras.optimizers import SGD # Setup TMVA TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA.root', 'RECREATE') factory = TMVA.Factory('TMVAMulticlass', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=multiclass') # Load data data = TFile.Open("/net/data_cms1b/user/dmeuser/top_analysis/output/ttbar_res100.0_new.root") t_all = data.Get("ttbar_res100.0/ttbar_res_dilepton_CP5") dataloader = TMVA.DataLoader('dataset') # ~for var in ["PuppiMET","METunc_Puppi","MET","HT","nJets","n_Interactions","Lep1_pt","Lep1_phi","Lep1_eta","Lep1_E","Lep1_flavor","Lep2_pt","Lep2_phi","Lep2_eta","Lep2_E","Lep2_flavor","Jet1_pt","Jet1_phi","Jet1_eta","Jet1_E","Jet2_pt","Jet2_phi","Jet2_eta","Jet2_E"]: for var in ["PuppiMET","METunc_Puppi","HT","nJets","n_Interactions","Lep1_pt","Lep1_phi","Lep1_eta","Lep1_E","Lep1_flavor","Lep2_pt","Lep2_phi","Lep2_eta","Lep2_E","Lep2_flavor","Jet1_pt","Jet1_phi","Jet1_eta","Jet1_E","Jet2_pt","Jet2_phi","Jet2_eta","Jet2_E"]: dataloader.AddVariable(var) dataloader.AddTree(t_all, 'Bin1') dataloader.AddTree(t_all, 'Bin2') dataloader.AddTree(t_all, 'Bin3') dataloader.AddTree(t_all, 'Bin4')
X = np.concatenate([class_0, class_1, class_2]) y = np.ones(X.shape[0]) w = RNG.randint(1, 10, n_events * 3) y[:class_0.shape[0]] *= 0 y[-class_2.shape[0]:] *= 2 permute = RNG.permutation(y.shape[0]) X = X[permute] y = y[permute] # Split into training and test datasets X_train, y_train, w_train = X[:n_events], y[:n_events], w[:n_events] X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:] output = TFile('tmva_output.root', 'recreate') factory = TMVA.Factory('classifier', output, 'AnalysisType=Multiclass:' '!V:Silent:!DrawProgressBar') if ROOT_VERSION >= '6.07/04': data = TMVA.DataLoader('.') else: data = factory for n in range(2): data.AddVariable('f{0}'.format(n), 'F') # Call root_numpy's utility functions to add events from the arrays add_classification_events(data, X_train, y_train, weights=w_train) add_classification_events(data, X_test, y_test, weights=w_test, test=True) # The following line is necessary if events have been added individually: data.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')
def main(): try: # retrive command line options shortopts = "m:M:C:B:i:t:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str(mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" outfname = outfname + mass_str + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("./") #gROOT.Macro ( "./TMVAlogon.C" ) #gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Define the input variables that shall be used for the classifier training factory.AddVariable("pho1_ptOverM", "P_{T}^{lead} / M_{H}", "", 'F') factory.AddVariable("pho2_ptOverM", "P_{T}^{sublead} / M_{H}", "", 'F') factory.AddVariable("pho1_eta", "#eta^{lead}", "", 'F') factory.AddVariable("pho2_eta", "#eta^{sublead}", "", 'F') factory.AddVariable("d_phi", "#Delta #phi", "rad", 'F') #should this be cos delta Phi factory.AddVariable("H_ptOverM", "P_{T}^{Higgs}/M_{H}", "", 'F') factory.AddVariable("H_eta", "#eta^{Higgs}", "", 'F') factory.AddVariable("sigmaMOverM", "#sigmaM_{cor} / M", 'F') factory.AddVariable("sigmaMOverM_wrongVtx", "#sigmaM_{wrong} / M", 'F') factory.AddVariable("vtx_prob", "P_{vertex}", "", 'F') #factory.AddVariable( "cos_theta_star","cos(#theta)*", "", 'F' ); #factory.AddVariable( "max_eta","max(#eta^{lead},#eta^{sub.})", "", 'F' ); #factory.AddVariable( "min_r9","min(r9^{lead},r9^{sub.})", "", 'F' ); factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str + ".0") signal_test = input.Get(treeNameSig + "_test" + mass_str + ".0") background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str + ".0") background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str + ".0") # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width)) # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + mass_str + cat_str, "!H:!V:NTrees=400:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + mass_str + cat_str, "!H:!V:NTrees=500:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=50:NNodesMax=5" ) #test # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
from os.path import isfile import sys import numpy as np from keras.models import Sequential from keras.layers import Dense, Conv2D, Flatten, Activation, Reshape from keras.regularizers import l2 from keras.optimizers import SGD # Setup TMVA TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA_CNN_PyKeras.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=None:AnalysisType=Classification' ) ############################Loading the data file data = TFile.Open("/home/jui/Desktop/tmva/sample_images_32x32.root") # print(data.ls()) signal = data.Get('sig_tree;2') background = data.Get('bkg_tree;2') dataloader = TMVA.DataLoader('dataset_evaltest') imgSize = 1024 dataloader.AddVariablesArray("vars", imgSize) dataloader.AddSignalTree(signal, 1.0)
from root_numpy import ROOT_VERSION from ROOT import TMVA, TFile, TCut from array import array plt.style.use('ggplot') RNG = np.random.RandomState(1) # Create an example regression dataset X = np.linspace(0, 6, 100)[:, np.newaxis] y = np.sin(X).ravel() + \ np.sin(6 * X).ravel() + \ RNG.normal(0, 0.1, X.shape[0]) # Fit a regression model output = TFile('tmva_output.root', 'recreate') factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:' '!V:Silent:!DrawProgressBar') if ROOT_VERSION >= '6.07/04': data = TMVA.DataLoader('.') else: data = factory data.AddVariable('x', 'F') data.AddTarget('y', 'F') add_regression_events(data, X, y) add_regression_events(data, X, y, test=True) # The following line is necessary if events have been added individually: data.PrepareTrainingAndTestTree(TCut('1'), '') if ROOT_VERSION >= '6.07/04': BookMethod = factory.BookMethod
#ROOT.gROOT.SetBatch(True) # Setup TMVA TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() #r = ROOT.TRInterface.Instance() #TMVA.RMethodBase.Initialize() #MethodRXGB.Init() output = TFile.Open( 'Output_Classification_BsPhiJpsiEE_noPhiM_pTcuts_BOBest.root', 'RECREATE') #factory = TMVA.Factory('TMVAClassification_BsPhiEE', output, # '!V:ROC:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') factory = TMVA.Factory( 'TMVAClassification_BsPhiJpsiEE_noPhiM_pTcuts_BOBest', output, '!V:ROC:!Silent:Color:DrawProgressBar:AnalysisType=Classification') bkg_name = "BsPhiJpsiEE_MVATraining_Bkg_pTcuts.root" sig_name = "BsPhiJpsiEE_MVATraining_Sig_pTcuts.root" #bkg_name = "BsPhiJpsiEE_MVATraining_Bkg.root" #sig_name = "BsPhiJpsiEE_MVATraining_Sig.root" #branches = ['elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'jpsiPt', 'phiPt', 'bsPt', 'eledR', 'kaondR', 'jpsiPhidR', 'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead', 'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead', 'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead'] #branches = ['elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'bsPt', 'eledR', 'kaondR', 'jpsiPhidR', 'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead', 'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead', 'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead'] branches = [ 'elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'bsPt', 'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead', 'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead', 'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead' ]
loader.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=" + str(NSIG) +\ ":nTrain_Background=" + str(NBKG) +\ ":nTest_Signal=" + str(NSIG_TEST) +\ ":nTest_Background=" + str(NBKG_TEST) +\ ":SplitMode=Random:NormMode=NumEvents:!V" ) factory = TMVA.Factory( "Optimization", '!V:!ROC:!Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification' ) (TMVA.gConfig().GetIONames()).fWeightFileDir = '/weights' kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + modelName +\ ':SaveBestOnly=true' +\ ':NumEpochs=' + str(epochs) +\ ':BatchSize=' + str(batchSize) +\ ':TriesEarlyStopping=' + str(patience) factory.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods()
Use = { 'BDT': 1, 'BDTG': 0, 'BDTRT': 0, 'BDTB': 0, 'BDTD': 0, 'BDTF': 0, 'MLP': 0, 'MLPBFGS': 0, 'MLPBNN': 0, 'CFMlpANN': 0, 'TMlpANN': 0 } factory = t.Factory( 'vbf_bdt_combined_james_current', outputFile, '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification' ) factory.Print() t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir dataloader = t.DataLoader(".") dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F') dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F') dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F') dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F') dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F') dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F') dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F') dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james', 'F')
Authors: A. Ramirez-Morales and J. Salmon-Gamboa --------------------------------------------------------------- ''' # tmva module import ROOT from ROOT import TFile, TMVA, TCut m_outputFile = TFile("TMVA.root", 'RECREATE') m_inputFile = TFile.Open("./data/cancer.root") TMVA.Tools.Instance() m_factory = TMVA.Factory( "TMVAClassification", m_outputFile #this is optional , "!V:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) m_loader = TMVA.DataLoader("dataset") m_loader.AddVariable("age", 'F') m_loader.AddVariable("menopause", 'F') m_loader.AddVariable("tumorSize", 'F') m_loader.AddVariable("invNodes", 'F') m_loader.AddVariable("degMalig", 'F') # m_loader.AddVariable("nodeCaps", 'F') # m_loader.AddVariable("breast", 'I') # m_loader.AddVariable("breastQuad") # m_loader.AddVariable("irradiat")
traintree_name = treename trainfilename="proctrain.csv.root" trainfile = TFile.Open(trainfilename,"read") traintree = trainfile.Get(traintree_name) TMVA.Tools.Instance() # create the tmva output file, which will be full of details about the training fout = TFile("tmvatest.root","RECREATE") # use the default factory factory = TMVA.Factory("TMVAClassification", fout) dataloader = TMVA.DataLoader("dataset") # build the list of variables al=traintree.GetListOfBranches() varlist=[] for i in range(al.GetEntries()): varlist+=[al[i].GetName()] if debug: print "all variables of ",trainfile, " ", varlist print "now stripping EventId Weight and Label " # these three variables should not be used for training
def main(): NTupName = "JetTree" alg = "TruthRawTrim" cuts = ["eta>-1.2","eta<1.2","pt>300","pt<5000","m>61","m<85"] vars = ["Tau21","T2jet"] #NTupName = "varTree" verbose = True #alg = "AK10LCTRIMF5R20" spectators = ["m"] #cuts = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"] #vars = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"] methods = "Likelihood" print "Starting and getting arguments:" allargs = sys.argv[1:] if len(allargs)<5: print "You input these args" print allargs print "Not enough args, please try again" return 1 else: alg = allargs[0] spectators = allargs[1].split(",") cuts = allargs[2].split(",") vars = allargs[3].split(",") methods = allargs[4] print "Running with args:" print " alg = ",alg print " spectators = ",spectators print " cuts = ",cuts print " vars = ",vars print " methods = ",methods # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() #=============================== #Read training and test data #=============================== #InputDir = "../gen_20170529/" InputDir="../Ana_EventGeneration/" #InputDir="~/Downloads/" print "Getting inputs from: ",InputDir #s1 = TFile(InputDir+"ntuple_ttbar_2000.root"); #b1 = TFile(InputDir+"ntuple_dijet_800_1400.root"); s1 = TFile(InputDir+"ntuple_tt_test10000.root"); b1 = TFile(InputDir+"ntuple_dijet_test10000.root"); # Output file OutFileName="testout.root" outputFile = TFile( OutFileName, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # data loader dataloader=TMVA.DataLoader("dataset") # weight="" # weight+="pass_selection*EventWeight*CrossSection*(" # weight+=alg+"_pt>"+pt1+" && " # weight+=alg+"_pt<"+pt2 # if m1!="0": # weight+=" && "+alg+"_m>"+m1+" && " # weight+=alg+"_m<"+m2 # weight+=")" # # #Get signal and background histograms # if variable=="mass": # histname = "akt10"+alg+"_trim_"+variable # else: # histname = alg+"_"+variable #====================================== #Predefined cuts - for isntance on M(j1) #====================================== mycuts = "1.0" mycutb = "1.0" for cut in cuts: placecut=cut if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="m": placecut = "* ("+alg+"_"+cut+")" else: placecut="* ("+cut+") " mycuts += placecut mycutb += placecut print "MyCutsSig: ",mycuts print "MyCutsBkg: ",mycutb #=================================== #Spectator variables from tree #===================================== for spec in spectators: dataloader.AddSpectator( spec, 'F' ) #=================================== #MVA variables from tree #===================================== for var in vars: dataloader.AddVariable( var , 'F' ) #=============================== #Read training and test data #=============================== print "Getting trees ... ",NTupName st1 = s1.Get(NTupName) bt1 = b1.Get(NTupName) #========================================= # global event weights per tree (see below for setting event-wise weights) #========================================= ws1 = 1.0 wb1 = 1.0 #========================================= # You can add an arbitrary number of signal or background trees #========================================= dataloader.AddSignalTree ( st1, ws1 ); #SCHSU #dataloader.SetSignalWeightExpression("EventWeight*CrossSection"); dataloader.AddBackgroundTree( bt1, wb1 ); #dataloader.SetBackgroundWeightExpression("EventWeight*CrossSection"); # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples mycutSig = TCut(mycuts) mycutBkg = TCut(mycutb) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: # DEFAULT # factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", # "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) # CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % OutFileName print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList))+'vars' Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth outfname = "dataset/weights/TMVA_"+Note+".root" # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Output file outputFile = TFile( outfname, 'RECREATE' ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose( verbose ) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note dataloader = TMVA.DataLoader('dataset') for iVar in varList: if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I') else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F') inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir+infname) sigChain = iFileSig.Get("ljmet") dataloader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir+varsList.bkg[i])) print inputDir+varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue dataloader.AddBackgroundTree( bkg_trees_list[i], 1) signalWeight = 1 dataloader.SetSignalWeightExpression( weightStrS ) dataloader.SetBackgroundWeightExpression( weightStrB ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028' model = Sequential() model.add(Dense(100, activation='relu', input_dim=53)) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(2, activation="sigmoid"))) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',]) # Store model to file model.save('model.h5') model.summary() if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close() # save plots: print "DONE"
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass outfname = "weights/TMVA_" + Note + ".root" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI # gROOT.SetMacroPath( "./" ) # gROOT.Macro ( "./TMVAlogon.C" ) # gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string # factory = TMVA.Factory( "TMVAClassification", outputFile, # "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] for iVar in varList: if iVar[0] == 'NJets_singleLepCalc': factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I') else: factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables inputDir = varsList.inputDir print 'mass point ' + mass infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % ( mass) iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") # os.exits(1) #BDT machinary factory.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir + varsList.bkg[i])) print inputDir + varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue factory.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) factory.SetSignalWeightExpression(weightStrS) factory.SetBackgroundWeightExpression(weightStrB) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, # "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" ) "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # bdtSetting for "BDT" bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTMitFisher" bdtFSetting = '!H:!V:NTrees=%s' % nTrees bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20' bdtFSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTG" bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20' bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG #bdtGSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTB" bdtBSetting = '!H:!V:NTrees=%s' % nTrees bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20' bdtBSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTD" bdtDSetting = '!H:!V:NTrees=%s' % nTrees bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate' bdtDSetting += ':IgnoreNegWeightsInTraining=True' #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m #BOOKING AN ALGORITHM if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting) if methods == "BDTG": factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting) if methods == "BDTMitFisher": factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting) if methods == "BDTB": factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting) if methods == "BDTD": factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # # print "=== wrote root file %s\n" % outfname # print "=== TMVAClassification is done!\n" # save plots: os.chdir('weights/' + Note) #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies gROOT.SetBatch(1) TMVA.efficiencies( "../../" + outfname ) #Classifier Background Rejection vs Signal Efficiency (ROC curve) #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve) TMVA.mvas("../../" + outfname, 0) #Classifier Output Distributions (test sample) TMVA.correlations( "../../" + outfname) #Input Variable Linear Correlation Coefficients TMVA.variables("../../" + outfname) #Input variables (training sample) #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed) #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed) if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname) # os.chdir('plots') # try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png') # except: pass # try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png') # except: pass # #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png') # #except: pass # try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png') # except: pass # try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png') # except: pass # try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png') # except: pass # try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png') # except: pass # try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png') # except: pass # os.system('rm *.eps') print "DONE"
def main(o, args): # Import TMVA classes from ROOT from ROOT import TMVA, TFile, TCut print o # Output file outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE') atype = "Classification" if hasattr(o, "type"): atype = str(o.type) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s" % atype) # Set verbosity factory.SetVerbose(o.verbose) TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir # variables if type(o.variables) == str: o.variables = [ v.lstrip().rstrip() for v in o.variables.split(":") if v != "" ] allvars = "" for v in o.variables: factory.AddVariable(str(v)) if allvars != "": allvars += ":" allvars += v.split(":=")[0].lstrip(" ").rstrip(" ") print "variables %s" % allvars print o.spectators for s in o.spectators: if not s in o.variables: factory.AddSpectator(str(s)) # categories and sub categories categories = [] subcategories = [] if hasattr(o, "subcategories") and len(o.subcategories) > 0: subcategories = o.subcategories[0] for sc in o.subcategories[1:]: subcategories = map( lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" % (x[0][1], x[1][1])), itertools.product(subcategories, sc)) for cut, name, vars in o.categories: myvars = allvars if vars != "": for v in vars.split(":"): myvars = myvars.replace(v, "").replace("::", ":") myvars = myvars.rstrip(":") vars = str(myvars) print vars if len(subcategories) > 0: for subcut, subname in subcategories: if subname == "": subname = subname.replace(" ", "").replace( ">", "_gt_").replace("<", "_lt_").replace( "=", "_eq_").replace("&", "_and_") fullname = "%s_%s" % (name, subname) categories.append( (TCut(cut) * TCut(subcut), str(fullname), vars)) else: categories.append((TCut(cut), str(name), vars)) # load tree selection = TCut(o.selection) for evclass, info in o.classes.iteritems(): samples = info["samples"] for name, weight, cut, ttype in samples: tcut = TCut(cut) * selection factory.AddTree(mkChain(getListOfFiles(o.indir, o.files), name), str(evclass), float(weight), tcut, int(ttype)) # weights if "weight" in info: weight = info["weight"] factory.AddSpectator(str("%s_wei := %s" % (evclass, weight))) factory.SetWeightExpression(str(weight), str(evclass)) else: factory.SetWeightExpression("1.", str(evclass)) # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( TCut(""), "SplitMode=Random:NormMode=NumEvents:!V") # -------------------------------------------------------------------------------------------------- # Fisher discriminant (same as LD) defaultSettings = { "BDT": "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedGrad" ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ":Shrinkage=0.3:NTrees=1000", "Cuts": "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" } if "FisherD" in o.methods: mname = "FisherD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D") if "Fisher" in o.methods: mname = "Fisher%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs") if "Likelihood" in o.methods: mname = "Likelihood%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "LikelihoodD" in o.methods: mname = "LikelihoodD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "BDT" in o.methods: mname = str("BDT%s" % o.label) settings = defaultSettings["BDT"] if hasattr(o, "settings") and "BDT" in o.settings: settings = str(o.settings["BDT"]) print mname, settings if len(categories) == 0: cats = factory.BookMethod(TMVA.Types.kBDT, mname, settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kBDT, "%s_%s" % (mname, name), settings) if "Cuts" in o.methods: mname = "Cuts%s" % o.label settings = defaultSettings["Cuts"] if hasattr(o, "settings") and "Cuts" in o.settings: settings = str(o.settings["Cuts"]) if len(categories) == 0: cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kCuts, "%s_%s" % (mname, name), settings) # ---- Now you can tell the factory to train, test, and evaluate the MVAs. if o.optimize: print "Optimizing?" factory.OptimizeAllMethods() factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() # Save the output. outputFile.Close()
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) #! gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' ) #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' ) #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def _setFactory(self, outFileName): self._fout = TFile(outFileName, "RECREATE") self._factory = TMVA.Factory(self._options['factory']['name'], self._fout, self._options['factory']['options'])
def main(): print "\n", "=" * 80 print "\tDESY 2017 - classification with TMVA" print "=" * 80 # summary root file summaryFilename = 'TMVA.root' # results directory resultsDir = 'results' os.system('mkdir -p %s' % resultsDir) #------------------------------------------------------------------ # get signal file and associated Root tree sigFilename = "../public/ntuple_HZZ4L.root" sigFile, sigTree = getTree(sigFilename) # get background file and associated Root tree bkgFilename = "../public/ntuple_ZZ4L.root" bkgFile, bkgTree = getTree(bkgFilename) #------------------------------------------------------------------ # create a factory for booking machine learning methods #------------------------------------------------------------------ outputFile = TFile("TMVA.root", "recreate") options = ''' !V Color !Silent DrawProgressBar AnalysisType=Classification Transformations=I;D ''' factory = TMVA.Factory("Z1massZ2mass", outputFile, formatOptions(options)) #------------------------------------------------------------------ # set up data set for training and testing #------------------------------------------------------------------ dataLoader = TMVA.DataLoader(resultsDir) # define variables to be used dataLoader.AddVariable("Z1mass", 'D') dataLoader.AddVariable("Z2mass", 'D') # define from which trees data are to be taken # from and the global weights to be assigned to # the training data sigWeight = 1.0 dataLoader.AddSignalTree(sigTree, sigWeight) dataLoader.SetSignalWeightExpression("weight") bkgWeight = 1.0 dataLoader.AddBackgroundTree(bkgTree, bkgWeight) dataLoader.SetBackgroundWeightExpression("weight") # you can apply cuts, if needed cut = TCut("") options = ''' SplitMode=Random NormMode=EqualNumEvents nTrain_Signal=2500 nTest_Signal=2500 nTrain_Background=2500 nTest_Background=2500 !V ''' dataLoader.PrepareTrainingAndTestTree( cut, # signal cut cut, # background cut formatOptions(options)) #------------------------------------------------------------------ # ok, almost done, define machine learning methods to be run #------------------------------------------------------------------ options = ''' !H !V VarTransform=None ''' factory.BookMethod(dataLoader, TMVA.Types.kFisher, "Fisher", formatOptions(options)) options = ''' !H !V BoostType=AdaBoost NTrees=200 nEventsMin=100 nCuts=50 ''' factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT", formatOptions(options)) options = ''' !H !V NCycles=500 VarTransform=N HiddenLayers=5 TrainingMethod=BFGS ''' factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP", formatOptions(options)) #------------------------------------------------------------------ # ok, let's go! #------------------------------------------------------------------ factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close()
def main(): try: # retrive command line options shortopts = "m:i:t:b:s:a:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "bkgList=", "sigList=", "anaType=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME bkgList = DEFAULT_BKGLIST sigList = DEFAULT_SIGLIST anaType = DEFAULT_ANATYPE treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-b", "--bkgList"): bkgList = a elif o in ("-s", "--sigList"): sigList = a elif o in ("-a", "--anaType"): anaType = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) #Edited setupTMVA(pathToTMVA) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string suffix = outfname.split("/")[-1].split(".")[0] factory = TMVA.Factory( "TMVAClassification_%s" % suffix, outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #Edited #follow 2LSS note Ch5.2: Disciminating variables factory.AddVariable("mT2 := sig.mT2", 'F') factory.AddVariable("pt := l12.pt", 'F') factory.AddVariable("MET := sig.MetRel", 'F') factory.AddVariable("Ht := Sum$(jets.pt) + Sum$(leps.pt)", 'F') factory.AddVariable("mTl1 := leps.mT[0]", 'F') factory.AddVariable("mTl2 := leps.mT[1]", 'F') factory.AddVariable("ll_dPhi:= l12.dPhi", 'F') factory.AddVariable( "l12m := (int(abs(leps.ID[0]))!=int(abs(leps.ID[1])))*100 + l12.m", 'F') #ISR region if (anaType == "doISR"): factory.AddVariable("JetMET_dPhi := jets.MET_dPhi[0]", 'F') factory.AddVariable("MET_JetPt_R := sig.MetRel/jets.pt[0]", 'F') factory.AddVariable("l1Pt_JetPt_R := leps.pt[0]/jets.pt[0]", 'F') #factory.AddSpectator( "pt1 := leps.pt[0]" , 'F' ) #factory.AddSpectator( "pt2 := leps.pt[1]" , 'F' ) #factory.AddSpectator( "ID1 := int(leps.ID[0])" , 'I' ) #factory.AddSpectator( "ID2 := int(leps.ID[1])" , 'I' ) #factory.AddSpectator( "nCentralJets := Sum$(jets.pt>20 && abs(jets.eta)<2.4)" , 'I' ) #FIXME setupXsecDB(pathToSUSYTools) from ROOT.SUSY import CrossSectionDB xsecDB = CrossSectionDB(pathToSUSYTools + "data/mc15_13TeV/") #read in training data openedInFileList = [] # Read input sig sigList = open(sigList, "r") for infname in sigList: inFile = TFile.Open(infname[:-1]) openedInFileList.append(inFile) hCutFlow = inFile.FindObjectAny("hCutFlow") mcEntry = hCutFlow.GetBinContent(1) #FIXME: hard coded extract runNum from filePath m = re.match(".*\.([0-9]{6})\..*", infname) runNum = int(m.groups()[0]) xSECxEff = xsecDB.xsectTimesEff(runNum, 125) + xsecDB.xsectTimesEff( runNum, 127) #125,127 is channel no. # Get the trees for training signal = inFile.Get("Data_") # Global event weights (see below for setting event-wise weights) #signalWeight = getXSECxEff(xsecDB, infname) * tarLumi / mcEntry #signalWeight = xSECxEff * tarLumi / mcEntry signalWeight = 1.0 * tarLumi / mcEntry #treat diff SUSY scenario with equal weight if signalWeight <= 0: print "Encounter <=0 weight sample %s , skipped" % infname continue print "mc sig ", runNum, mcEntry, xSECxEff factory.AddSignalTree(signal, signalWeight) sigList.close() # Read input bkg bkgList = open(bkgList, "r") for infname in bkgList: inFile = TFile.Open(infname[:-1]) openedInFileList.append(inFile) if "physics" in infname: #its real data print "data bkg", infname[:-1] background = inFile.Get("CFlip_") if background: factory.AddBackgroundTree(background, 1.0) background = inFile.Get("FakeLep_") if background: factory.AddBackgroundTree(background, 1.0) else: #its MC data hCutFlow = inFile.FindObjectAny("hCutFlow") mcEntry = hCutFlow.GetBinContent(1) #FIXME: hard coded extract runNum from filePath m = re.match(".*\.([0-9]{6})\..*", infname) runNum = int(m.groups()[0]) xSECxEff = xsecDB.xsectTimesEff(runNum) # Get trees for training background = inFile.Get("Data_") # Global event weights (see below for setting event-wise weights) backgroundWeight = xSECxEff * tarLumi / mcEntry if backgroundWeight <= 0: print "Encounter <=0 weight sample %s , skipped" % infname print "mc bkg ", runNum, mcEntry, xSECxEff factory.AddBackgroundTree(background, backgroundWeight) bkgList.close() # event-wise weights #factory.SetSignalWeightExpression( "weight" ) #factory.SetBackgroundWeightExpression( "weight" ) factory.SetSignalWeightExpression("ElSF*MuSF") factory.SetBackgroundWeightExpression( "(CFlipWeight0*FakeLepWeight0)!=1.0 ? CFlipWeight0*FakeLepWeight0 : !TMath::IsNaN(weight)? ElSF*MuSF*weight: 0.0" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) # trigCut = "sig.trigCode!=0" #"HLT_mu24_iloose_L1MU15" for mumu emu, "HLT_e24_lhmedium_iloose_L1EM20VH" for ee trigCut = "((nMu>0) && (sig.trigCode & (1<<2))) || ((nMu==0) && (sig.trigCode & (1<<26)))" grlCut = "evtInfo.passGRL==1" wCut = "weight>0 && weight<1e9" tauCut = "1" # "nTau==0" FIXME nTau not properly filled in NTUP yet.. bjetCut = "Sum$(jets.isBJet)==0" cosmicCut = "Sum$(leps.isCosmic)==0" htCut = "(Sum$(jets.pt) + Sum$(leps.pt))>40" posWCut = "FakeLepWeight0>0" isrCut = "Sum$(jets.pt>20 && abs(jets.eta)<2.4) %s" % ( ">0" if anaType == "doISR" else "==0") #nCentralJets>0 or ==0 zMassCut = "!(int(abs(leps.ID[0])) == int(abs(leps.ID[1])) && fabs(l12.m - 91.1876)<=5)" #commonCut = "&&".join(["(%s)"%cut for cut in [trigCut , grlCut , bjetCut, cosmicCut]]) commonCut = "&&".join([ "(%s)" % cut for cut in [trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut] ]) commonCut = TCut(commonCut) sigCut = "&&".join([ "(%s)" % cut for cut in [trigCut, grlCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut] ]) sigCut = TCut(sigCut) bkgCut = "&&".join([ "(%s)" % cut for cut in [ trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut, posWCut ] ]) bkgCut = TCut(bkgCut) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( sigCut, bkgCut, "nTrain_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V" ) #"nTrain_Signal=0:nTrain_Background=2000:SplitMode=Random:NormMode=EqualNumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=2:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV'] # varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] # varList = ['svMass', 'dRTauTau', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ2DPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] varList = [ 'svMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi', 'CSVJ1', 'CSVJ2' ] for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "fMass") # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # sigChain = r.TChain("ttTreeFinal/eventTree") # bkg1Chain = r.TChain("ttTreeFinal/eventTree") # bkg2Chain = r.TChain("ttTreeFinal/eventTree") # Get the signal and background trees for training iFileSig = TFile.Open("/scratch/zmao/relaxed_regression/%s" % (infname)) iFileBkg = TFile.Open( "/scratch/zmao/relaxed_regression/trainSample_relaxedsamebTag.root") sigChain = iFileSig.Get("eventTree") bkgChain = iFileBkg.Get("eventTree") # Global event weights (see below for setting event-wise weights) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree(sigChain, signalWeight) factory.AddBackgroundTree(bkgChain, 1) factory.SetSignalWeightExpression('triggerEff') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "iso1<1.5 && iso2<1.5 && CSVJ1 > 0.679 && CSVJ2 > 0.244 && abs(eta1)<2.1 && abs(eta2)<2.1 && charge1 + charge2 == 0" ) mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_QCD_%i.xml' % len( varList) os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:p:M:C:B:i:t:T:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT phil = DEFAULT_PHIL outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False test = False testType = DEFAULT_TEST_TYPE methTest = False testMethod = DEFAULT_TEST_METHOD for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-p", "--philosophy"): phil = a elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-T", "--test"): test = True temp = a.split('_') if len(temp) == 1: testType = temp[0] if testType != "ada" or testType != "grad": print "ERROR: testType must be ada or grad not", testType elif len(temp) - temp.count('') == 2: methTest = True testType = temp[0] testMethod = temp[1] checkTestType(testType, testMethod) else: print "ERROR: need to give one or two test options" print temp sys.exit(1) elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str("%3.1f" % mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" if test: if methTest: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + "_" + testMethod + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/") gROOT.Macro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAlogon.C") gROOT.LoadMacro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) factory.AddVariable("bdtoutput", "BDT Output", 'F') factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str) signal_test = input.Get(treeNameSig + "_test" + mass_str) background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str) background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width) + " && bdtoutput > -0.5") # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS if (not test): # Likelihood factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood" + phil, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD" + phil, "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) #factory.BookMethod( TMVA.Types.kPDERS, "MultiLikelihood"+phil,"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); # BDT factory.BookMethod( TMVA.Types.kBDT, "BDTada" + phil, "!H:!V:NTrees=200:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=1.0:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDTgrad" + phil, "!H:!V:NTrees=200:MaxDepth=3:BoostType=Grad:Shrinkage=0.5:UseBaggedGrad:GradBaggingFraction=1.0:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) else: #test # BDT ada if testType == "ada": #if testMethod=="nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d", "!H:!V:NTrees=" + str(nTrees) + ":nEventsMin=150:MaxDepth=" + str(depth) + ":BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) # if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_0.05b_"+str(nCuts)+"c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts="+str(nCuts)+":PruneMethod=NoPruning") #if testMethod=="beta": # for beta in [0.05,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_"+str(beta)+"b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta="+str(beta)+":SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") # BDT grad if testType == "grad": if testMethod == "nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: for shrinkage in [0.05, 0.5, 1.]: factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d_" + str(shrinkage) + "s", "!H:!V:NTrees=" + str(nTrees) + ":MaxDepth=" + str(depth) + ":BoostType=Grad:Shrinkage=" + str(shrinkage) + ":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) #if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="shrinkage": # for shrinkage in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_"+str(shrinkage)+"s_1gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage="+str(shrinkage)+":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="bagFrac": # for bagFrac in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_"+str(bagFrac)+"gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction="+str(bagFrac)+":SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_"+str(nCuts)+"c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts="+str(nCuts)+":NNodesMax=10") #if testMethod=="nNM": # for nNM in [10,100,500,1000,10000]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_50c_"+str(nNM)+"nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax"+str(nNM)) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"