def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA.root', 'RECREATE') factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') dataloader = TMVA.DataLoader('dataset')
def get_correlation_matrix(year, variables): # Returns the correlation matrix of the given variables # Get signal and background paths signal_path = os.path.join(os.getcwd(), varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018, varsList.sig2017_0[0] if year == 2017 else varsList.sig2018_0[0]) bkgrnd_path = os.path.join(os.getcwd(), varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018, varsList.bkg2017_0[0] if year == 2017 else varsList.bkg2018_0[0]) # Create TMVA object loader = TMVA.DataLoader("tmva_data") # Load used variables for var in variables: try: var_data = varsList.varList["DNN"][[v[0] for v in varsList.varList["DNN"]].index(var)] loader.AddVariable(var_data[0], var_data[1], var_data[2], "F") except ValueError: print("[WARN] The variable {} was not found. Omitting.".format(var)) # Open ROOT files signal_f = TFile.Open(signal_path) signal = signal_f.Get("ljmet") bkgrnd_f = TFile.Open(bkgrnd_path) bkgrnd = bkgrnd_f.Get("ljmet") # Load signal and background loader.AddSignalTree(signal) loader.fTreeS = signal loader.AddBackgroundTree(bkgrnd) loader.fTreeB = bkgrnd # Set weights loader.SetSignalWeightExpression(weight_string) loader.SetBackgroundWeightExpression(weight_string) # Set cuts loader.PrepareTrainingAndTestTree( cut_string, cut_string, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V:VerboseLevel=Info" ) # Set the pointer to the right histogram loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection() # Retrieve the signal correlation matrix sig_th2 = loader.GetCorrelationMatrix("Signal") n_bins = sig_th2.GetNbinsX() sig_corr = np.zeros((n_bins, n_bins)) for x in range(n_bins): for y in range(n_bins): sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1) return sig_corr
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA_SSSF.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') # factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') dataloader = TMVA.DataLoader('datasetSSSF04Feb') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue print sampleName sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V') # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C2", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=200:MaxDepth=2" ); factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C1", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=100:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C05", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=50:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4500", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4750", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=750:MaxDepth=2" ) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def TMVANN(filenameSTRING, outputDOTrootSTRING, sigtreeSTRINGS, bkgtreeSTRING, variablesSTRING): NNfile = R.TFile(outputDOTrootSTRING, "recreate") NNfile.Close() for sigtreeSTRING in sigtreeSTRINGS: from ROOT import TMVA file = R.TFile(filenameSTRING) #importing the datasetsignalslowlevel signaltree = file.Get(sigtreeSTRING) #setting signaltree backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree) sigweights = file.Get(sigtreeSTRING + weights) bkgweights = file.Get(bkgtreeSTRING + weights) TMVA.Tools.Instance() NNfile = R.TFile( outputDOTrootSTRING, "update") #Writing the root file required for the TMVA factory TMVAfactory = TMVA.Factory( "TMVANN", NNfile, "V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) TMVAfactory.SetVerbose(False) #Setting extra info (verbose) to false datasetsignalslowlevel = TMVA.DataLoader( "datasetsignalslowlevel") #Instantiating a datasetsignalslowlevel datasetsignalslowlevel.AddSignalTree(signaltree, 1.) #adding signal datasetsignalslowlevel.AddBackgroundTree(backgroundtree, 1.) #adding background print(sigweights) datasetsignalslowlevel.SetSignalWeightExpression(weights) datasetsignalslowlevel.SetBackgroundWeightExpression(weights) for i in variablesSTRING: #adding our training variables to the TMVA datasetsignalslowlevel.AddVariable(i) signalcut = R.TCut("") #Variables are already cut backgroundcut = R.TCut("") datasetsignalslowlevel.PrepareTrainingAndTestTree( signalcut, backgroundcut, "nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V" ) TMVAfactory.BookMethod( datasetsignalslowlevel, TMVA.Types.kMLP, "LowLevelNN_3layer25,20,10_100Epoch_tanhNeuron" + sigtreeSTRING, "H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=25,20,10:TestRate=5" ) TMVAfactory.TrainAllMethods() TMVAfactory.TestAllMethods() TMVAfactory.EvaluateAllMethods() NNfile.Close() NNfile = R.TFile(outputDOTrootSTRING, "update")
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() dataloader = TMVA.DataLoader('dataset_8Feb') output = TFile.Open('TMVA16.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents') factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500" ) # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT8","!H:!V:NTrees=1500:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.75:SeparationType=GiniIndex:nCuts=1000" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=1200:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=800" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT3", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=500" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT4", "!H:!V:NTrees=700:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=500" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07" , "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ); # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ); # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def getCorrelationMatrix(sigFile, bkgFile, weightStr, cutStr, varList): # gets the correlation matrix as np array varNames = [] loader = TMVA.DataLoader("dataset") for var in varList: if var[0] in "NJets_MultiLepCalc": loader.Addvariable(var[0], var[1], var[2], "I") else: loader.AddVariable(var[0], var[1], var[2], "F") varNames.append(var[0]) # open the root files input_sig = TFile.Open(sigFile) signal = input_sig.Get("ljmet") input_bkg = TFile.Open(bkgFile) background = input_bkg.Get("ljmet") # load in the trees loader.AddSignalTree(signal) loader.fTreeS = signal loader.AddBackgroundTree(background) loader.fTreeB = background # set weights loader.SetSignalWeightExpression(weightStr) loader.SetBackgroundWeightExpression(weightStr) # set cuts loader.PrepareTrainingAndTestTree( cutStr, cutStr, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # set the pointer to the right histogram loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection() # retrieve the signal correlation matrix sig_th2 = loader.GetCorrelationMatrix("Signal") # bkg_th2 = loader.GetCorrelationMatrix("Background") # convert to numpy array n_bins = sig_th2.GetNbinsX() sig_corr = np.zeros((n_bins, n_bins)) for x in range(n_bins): for y in range(n_bins): sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1) return sig_corr, varNames
def TMVANN (filenameSTRING,outputDOTrootSTRING,sigtreeSTRINGS,bkgtreeSTRING,variablesSTRING): for sigtreeSTRING in sigtreeSTRINGS: from ROOT import TMVA file = R.TFile(filenameSTRING) #importing the dataset signaltree = file.Get(sigtreeSTRING) #setting signaltree backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree) sigweights = file.Get(sigtreeSTRING+weights) bkgweights = file.Get(bkgtreeSTRING+weights) TMVA.Tools.Instance() NNfile = R.TFile(outputDOTrootSTRING,"recreate") #Writing the root file required for the TMVA factory TMVAfactory = TMVA.Factory("TMVANN",NNfile,"V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification") TMVAfactory.SetVerbose(False) #Setting extra info (verbose) to false dataset = TMVA.DataLoader("dataset") #Instantiating a dataset dataset.AddSignalTree(signaltree,1.) #adding signal dataset.AddBackgroundTree(backgroundtree ,1.) #adding background print(sigweights) #dataset.SetSignalWeightExpression(weights) #dataset.SetBackgroundWeightExpression(weights) for i in variablesSTRING: #adding our training variables to the TMVA dataset.AddVariable(i) signalcut = R.TCut("") #Variables are already cut backgroundcut = R.TCut("") dataset.PrepareTrainingAndTestTree(signalcut,backgroundcut,"nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V") #using all signal and background points to train, random selection, normalised to summed event weights = number of events for each tree, no verbose #Booking some methods #TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8:TestRate=5") #Artifical Neural Network 1 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kFisher, "FisherMethod","H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10") #Fisher Method TMVAfactory.BookMethod(dataset,TMVA.Types.kLikelihood, "BayesLikelihood","H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=60") #Bayes likleihood TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_4Layer8Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8,8,8,8:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer6Node_500Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=6:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer3Node_100Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=3:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_2Layer5Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=5,5:TestRate=5") #Artifical Neural Network 8 layers 500 epoch TMVAfactory.TrainAllMethods() TMVAfactory.TestAllMethods() TMVAfactory.EvaluateAllMethods() NNfile.Close() print ('TMVANN Ran & made ROOT file ' + outputDOTrootSTRING+sigtreeSTRING)
def _dataLoader(self, sigTreeNames, bkgTreeNames): self._data_loader = TMVA.DataLoader(self._options['factory']['name']) for value in self._variables.values(): self._data_loader.AddVariable(value['name'], value['type']) #---- for sigTreeName in sigTreeNames: self._data_loader.AddSignalTree( self._trees[sigTreeName + "_Train"], 1.0, "train") self._data_loader.AddSignalTree(self._trees[sigTreeName + "_Test"], 1.0, "test") for bkgTreeName in bkgTreeNames: self._data_loader.AddBackgroundTree( self._trees[bkgTreeName + "_Train"], 1.0, "train") self._data_loader.AddBackgroundTree( self._trees[bkgTreeName + "_Test"], 1.0, "test") self._data_loader.SetSignalWeightExpression( self._options['factory']['weight']) self._data_loader.SetBackgroundWeightExpression( self._options['factory']['weight']) #---- self._data_loader.PrepareTrainingAndTestTree( TCut(self._cuts['sig']), TCut(self._cuts['bkg']), self._options['prepareTrees'])
) #Locate and add data files file_VBF_HH_2016 = "../inputsamples/2016/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root" file_VBF_HH_2017 = "../inputsamples/2017/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root" file_GGF_HH_2016 = "../inputsamples/2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph.root" file_GGF_HH_2017 = "../inputsamples/2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg.root" ch_sig = TChain("bbbbTree") ch_bkg = TChain("bbbbTree") ch_sig.AddFile(file_VBF_HH_2016) ch_sig.AddFile(file_VBF_HH_2017) #ch_sig.AddFile(file_VBF_HH_BSM_2016) #ch_sig.AddFile(file_VBF_HH_BSM_2017) ch_bkg.AddFile(file_GGF_HH_2016) ch_bkg.AddFile(file_GGF_HH_2017) #Load data to TMVA dataloader = TMVA.DataLoader('GGFKiller') dataloader.AddVariable("abs_H1_eta:=abs(H1_eta)") dataloader.AddVariable("abs_H2_eta:=abs(H2_eta)") dataloader.AddVariable("H1_pt") dataloader.AddVariable("H2_pt") dataloader.AddVariable("JJ_j1_pt") dataloader.AddVariable("JJ_j2_pt") dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)") dataloader.AddVariable("h1h2_deltaEta") dataloader.AddVariable("h1j1_deltaR") dataloader.AddVariable("h1j2_deltaR") dataloader.AddVariable("h2j1_deltaR") dataloader.AddVariable("h2j2_deltaR") dataloader.AddVariable("abs_j1etaj2eta:=abs(j1etaj2eta)") dataloader.AddVariable("abs_costh_HH_b1_cm:=abs(costh_HH_b1_cm)") dataloader.AddVariable("abs_costh_HH_b2_cm:=abs(costh_HH_b2_cm)")
def main(): NTupName = "JetTree" alg = "TruthRawTrim" cuts = ["eta>-1.2","eta<1.2","pt>300","pt<5000","m>61","m<85"] vars = ["Tau21","T2jet"] #NTupName = "varTree" verbose = True #alg = "AK10LCTRIMF5R20" spectators = ["m"] #cuts = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"] #vars = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"] methods = "Likelihood" print "Starting and getting arguments:" allargs = sys.argv[1:] if len(allargs)<5: print "You input these args" print allargs print "Not enough args, please try again" return 1 else: alg = allargs[0] spectators = allargs[1].split(",") cuts = allargs[2].split(",") vars = allargs[3].split(",") methods = allargs[4] print "Running with args:" print " alg = ",alg print " spectators = ",spectators print " cuts = ",cuts print " vars = ",vars print " methods = ",methods # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() #=============================== #Read training and test data #=============================== #InputDir = "../gen_20170529/" InputDir="../Ana_EventGeneration/" #InputDir="~/Downloads/" print "Getting inputs from: ",InputDir #s1 = TFile(InputDir+"ntuple_ttbar_2000.root"); #b1 = TFile(InputDir+"ntuple_dijet_800_1400.root"); s1 = TFile(InputDir+"ntuple_tt_test10000.root"); b1 = TFile(InputDir+"ntuple_dijet_test10000.root"); # Output file OutFileName="testout.root" outputFile = TFile( OutFileName, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # data loader dataloader=TMVA.DataLoader("dataset") # weight="" # weight+="pass_selection*EventWeight*CrossSection*(" # weight+=alg+"_pt>"+pt1+" && " # weight+=alg+"_pt<"+pt2 # if m1!="0": # weight+=" && "+alg+"_m>"+m1+" && " # weight+=alg+"_m<"+m2 # weight+=")" # # #Get signal and background histograms # if variable=="mass": # histname = "akt10"+alg+"_trim_"+variable # else: # histname = alg+"_"+variable #====================================== #Predefined cuts - for isntance on M(j1) #====================================== mycuts = "1.0" mycutb = "1.0" for cut in cuts: placecut=cut if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="m": placecut = "* ("+alg+"_"+cut+")" else: placecut="* ("+cut+") " mycuts += placecut mycutb += placecut print "MyCutsSig: ",mycuts print "MyCutsBkg: ",mycutb #=================================== #Spectator variables from tree #===================================== for spec in spectators: dataloader.AddSpectator( spec, 'F' ) #=================================== #MVA variables from tree #===================================== for var in vars: dataloader.AddVariable( var , 'F' ) #=============================== #Read training and test data #=============================== print "Getting trees ... ",NTupName st1 = s1.Get(NTupName) bt1 = b1.Get(NTupName) #========================================= # global event weights per tree (see below for setting event-wise weights) #========================================= ws1 = 1.0 wb1 = 1.0 #========================================= # You can add an arbitrary number of signal or background trees #========================================= dataloader.AddSignalTree ( st1, ws1 ); #SCHSU #dataloader.SetSignalWeightExpression("EventWeight*CrossSection"); dataloader.AddBackgroundTree( bt1, wb1 ); #dataloader.SetBackgroundWeightExpression("EventWeight*CrossSection"); # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples mycutSig = TCut(mycuts) mycutBkg = TCut(mycutb) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: # DEFAULT # factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", # "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) # CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % OutFileName print "=== TMVAClassification is done!\n"
output = TFile.Open('BinaryClassificationKeras.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification' ) # Load data if not isfile('tmva_class_example.root'): call(['curl', '-O', 'http://root.cern.ch/files/tmva_class_example.root']) data = TFile.Open('tmva_class_example.root') signal = data.Get('TreeS') background = data.Get('TreeB') dataloader = TMVA.DataLoader('BinaryClassificationKeras') for branch in signal.GetListOfBranches(): dataloader.AddVariable(branch.GetName()) dataloader.AddSignalTree(signal, 1.0) dataloader.AddBackgroundTree(background, 1.0) dataloader.PrepareTrainingAndTestTree( TCut(''), 'nTrain_Signal=4000:nTrain_Background=4000:SplitMode=Random:NormMode=NumEvents:!V' ) # Generate model # Define model model = Sequential() model.add(Dense(64, init='glorot_normal', activation='relu', input_dim=4))
def add_variables(data_loader, good_variables): for variable in good_variables: if variable != "Bc_M": data_loader.AddVariable(variable, "F") return data_loader if __name__ == '__main__': decay_name = decay_names[decay_nb] output_file = TFile("~/TMVA/TMVAoutput" + decay_name + str(run) + ".root", "RECREATE") factory = TMVA.Factory("TMVA_" + decay_name, output_file, "DrawProgressBar=True") data_loader = TMVA.DataLoader("dataloader") moca_tree = dhand.combine_trees(run, decay_name, True) data_tree = dhand.combine_trees(run, decay_name, False) branches_to_keep = dhand.branch_selection(data_tree, branches, []) moca_tree = dhand.activate_branches(moca_tree, branches_to_keep) data_tree = dhand.activate_branches(data_tree, branches_to_keep) add_variables(data_loader, branches_to_keep) sgcut_test = TCut("runNumber%5==" + kfold + "&& (Bc_M > 5200 && Bc_M < 5400)") sgcut_train = TCut("runNumber%5!=" + kfold + "&& (Bc_M > 5200 && Bc_M < 5400)") bgcut_test = TCut("runNumber%5==" + kfold + "&& Bc_M > 5400") bgcut_train = TCut("runNumber%5!=" + kfold + "&& Bc_M > 5400")
trainfilename="proctrain.csv.root" trainfile = TFile.Open(trainfilename,"read") traintree = trainfile.Get(traintree_name) TMVA.Tools.Instance() # create the tmva output file, which will be full of details about the training fout = TFile("tmvatest.root","RECREATE") # use the default factory factory = TMVA.Factory("TMVAClassification", fout) dataloader = TMVA.DataLoader("dataset") # build the list of variables al=traintree.GetListOfBranches() varlist=[] for i in range(al.GetEntries()): varlist+=[al[i].GetName()] if debug: print "all variables of ",trainfile, " ", varlist print "now stripping EventId Weight and Label " # these three variables should not be used for training mva_input_list=[e for e in varlist if not e in ['EventId','Weight','Label']]
def main(): # runs the program checkRootVer() # check that ROOT version is correct try: # retrieve command line options shortopts = "d:o:v:w:y:h?" # possible command line options longopts = [ "dataset=", "option=", "where=", "year=", "verbose", "help", "usage" ] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts) # associates command line inputs to variables except getopt.GetoptError: # output error if command line argument invalid print("ERROR: unknown options in argument %s" % sys.argv[1:]) usage() sys.exit(1) myArgs = np.array( [ # Stores the command line arguments ['-d', '--dataset', 'dataset', 'dataset'], ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017], ['-o', '--option', 'option', 0], ['-v', '--verbose', 'verbose', True] ], dtype="object") for opt, arg in opts: if opt in myArgs[:, 0]: index = np.where( myArgs[:, 0] == opt)[0][0] # np.where returns a tuple of arrays myArgs[index, 3] = str( arg) # override the variables with the command line argument elif opt in myArgs[:, 1]: index = np.where(myArgs[:, 1] == opt)[0][0] myArgs[index, 3] = arg if opt in ("-?", "-h", "--help", "--usage"): # provides command line help usage() sys.exit(0) # Initialize some containers bkg_list = [] bkg_trees_list = [] sig_list = [] sig_trees_list = [] # Initialize some variables after reading in arguments option_index = np.where(myArgs[:, 2] == 'option')[0][0] dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0] verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0] where_index = np.where(myArgs[:, 2] == 'where')[0][0] year_index = np.where(myArgs[:, 2] == 'year')[0][0] DATASETPATH = myArgs[dataset_index][3] DATASET = DATASETPATH.split("/")[0] OPTION = myArgs[option_index][3] VERBOSE = myArgs[verbose_index][3] WHERE = myArgs[where_index][3] YEAR = myArgs[year_index][3] if WHERE == "lpc": if YEAR == 2017: inputDir = varsList.inputDirLPC2017 elif YEAR == 2018: inputDir = varsList.inputDirLPC2018 else: if YEAR == 2017: inputDir = varsList.inputDirBRUX2017 elif YEAR == 2018: inputDir = varsList.inputDirBRUX2018 if OPTION == "0": print("Using Option 0: default varList") varList = varsList.varList["DNN"] elif OPTION == "1": print("Using Option 1: selected data from {}".format(DATASETPATH)) varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines() varList = [] START = False for line in varsListHPO: if START == True: varList.append(str(line.strip())) if "Variable List:" in line: START = True numVars = len(varList) outf_key = str("Keras_" + str(numVars) + "vars") OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root" outputfile = TFile(OUTF_NAME, "RECREATE") # initialize and set-up TMVA factory factory = TMVA.Factory( "Training", outputfile, "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose(bool(myArgs[verbose_index, 3])) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key # initialize and set-up TMVA loader loader = TMVA.DataLoader(DATASET) if OPTION == "0": for var in varList: if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0], var[1], var[2], 'I') else: loader.AddVariable(var[0], var[1], var[2], "F") if OPTION == "1": for var in varList: if var == "NJets_MultiLepCalc": loader.AddVariable(var, "", "", "I") else: loader.AddVariable(var, "", "", "F") # add signal files if YEAR == 2017: for i in range(len(varsList.sig2017_2)): sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) elif YEAR == 2018: for i in range(len(varsList.sig2018_2)): sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) # add background files if YEAR == 2017: for i in range(len(varsList.bkg2017_2)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) elif YEAR == 2018: for i in range(len(varsList.bkg2018_2)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) loader.SetSignalWeightExpression(weightStrS) loader.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) ###################################################### ###################################################### ###### ###### ###### K E R A S D N N ###### ###### ###### ###################################################### ###################################################### HIDDEN = 0 NODES = 0 LRATE = 0. PATTERN = "" REGULATOR = "" ACTIVATION = "" BATCH_SIZE = 0 # modify this when implementing hyper parameter optimization: model_name = 'TTTT_' + str(numVars) + 'vars_model.h5' EPOCHS = 100 PATIENCE = 20 # edit these based on hyper parameter optimization results if OPTION == "0": HIDDEN = 3 NODES = 100 LRATE = 0.01 PATTERN = 'static' REGULATOR = 'none' ACTIVATION = 'relu' BATCH_SIZE = 256 if OPTION == "1": datasetDir = os.listdir(DATASETPATH) for file in datasetDir: if "params" in file: optFileName = file optFile = open(DATASETPATH + "/" + optFileName, "r").readlines() START = False for line in optFile: if START == True: if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip()) if "Initial" in line: NODES = int(line.split(":")[1].strip()) if "Batch" in line: BATCH_SIZE = 2**int(line.split(":")[1].strip()) if "Learning" in line: LRATE = float(line.split(":")[1].strip()) if "Pattern" in line: PATTERN = str(line.split(":")[1].strip()) if "Regulator" in line: REGULATOR = str(line.split(":")[1].strip()) if "Activation" in line: ACTIVATION = str(line.split(":")[1].strip()) if "Optimized Parameters:" in line: START = True kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \ ':SaveBestOnly=true' + \ ':NumEpochs=' + str(EPOCHS) + \ ':BatchSize=' + str(BATCH_SIZE) + \ ':TriesEarlyStopping=' + str(PATIENCE) model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION, numVars) model.save(model_name) model.summary() factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputfile.Close() print("Finished training in " + str( (time.time() - START_TIME) / 60.0) + " minutes.") ROC = factory.GetROCIntegral(DATASET, 'PyKeras') print('ROC value is: {}'.format(ROC)) if OPTION == "1": varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a") varsListHPOtxt.write("ROC Value: {}".format(ROC))
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList))+'vars' Note=methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth outfname = "dataset/weights/TMVA_"+Note+".root" # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) loader = TMVA.DataLoader("dataset") # Set verbosity # factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] for iVar in varList: if iVar[0]=='NJets_JetSubCalc': loader.AddVariable(iVar[0],iVar[1],iVar[2],'I') else: loader.AddVariable(iVar[0],iVar[1],iVar[2],'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables inputDir = varsList.inputDir infname = "TTTT_TuneCP5_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir+infname) sigChain = iFileSig.Get("ljmet") loader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] bkgList = varsList.bkg for i in range(len(bkgList)): bkg_list.append(TFile.Open(inputDir+bkgList[i])) print inputDir+bkgList[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree( bkg_trees_list[i], 1) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # To give different trees for training and testing, do as follows: # loader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # loader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : loader.SetSignalWeightExpression ("weight1*weight2"); # for background: loader.SetBackgroundWeightExpression("weight1*weight2"); loader.SetSignalWeightExpression( weightStrS ) loader.SetBackgroundWeightExpression( weightStrB ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # bdtSetting for "BDT" bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTMitFisher" bdtFSetting = '!H:!V:NTrees=%s' %nTrees bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20' bdtFSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTG" bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth) bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20' bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG #bdtGSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTB" bdtBSetting = '!H:!V:NTrees=%s' %nTrees bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20' bdtBSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTD" bdtDSetting = '!H:!V:NTrees=%s' %nTrees bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate' bdtDSetting += ':IgnoreNegWeightsInTraining=True' #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m #BOOKING AN ALGORITHM # if methods=="BDT": factory.BookMethod( TMVA.Types.kBDT, "BDT",bdtSetting) if methods=="BDT": factory.BookMethod( loader, TMVA.Types.kBDT, "BDT",bdtSetting) if methods=="BDTG": factory.BookMethod( TMVA.Types.kBDT, "BDTG",bdtGSetting) if methods=="BDTMitFisher": factory.BookMethod( TMVA.Types.kBDT, "BDTMitFisher",bdtFSetting) if methods=="BDTB": factory.BookMethod( TMVA.Types.kBDT, "BDTB",bdtBSetting) if methods=="BDTD": factory.BookMethod( TMVA.Types.kBDT, "BDTD",bdtDSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the loader to train, test, and evaluate the MVAs. # Train MVAs print "train all method" factory.TrainAllMethods() print "test all method" # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # save plots: os.chdir('dataset/weights/'+Note) if not gROOT.IsBatch(): TMVA.TMVAGui( outfname ) print "DONE"
plt.style.use('ggplot') RNG = np.random.RandomState(1) # Create an example regression dataset X = np.linspace(0, 6, 100)[:, np.newaxis] y = np.sin(X).ravel() + \ np.sin(6 * X).ravel() + \ RNG.normal(0, 0.1, X.shape[0]) # Fit a regression model output = TFile('tmva_output.root', 'recreate') factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:' '!V:Silent:!DrawProgressBar') if ROOT_VERSION >= '6.07/04': data = TMVA.DataLoader('.') else: data = factory data.AddVariable('x', 'F') data.AddTarget('y', 'F') add_regression_events(data, X, y) add_regression_events(data, X, y, test=True) # The following line is necessary if events have been added individually: data.PrepareTrainingAndTestTree(TCut('1'), '') if ROOT_VERSION >= '6.07/04': BookMethod = factory.BookMethod else: BookMethod = TMVA.Factory.BookMethod BookMethod(
def main(): print "\n", "=" * 80 print "\tDESY 2017 - classification with TMVA" print "=" * 80 # summary root file summaryFilename = 'TMVA.root' # results directory resultsDir = 'results' os.system('mkdir -p %s' % resultsDir) #------------------------------------------------------------------ # get signal file and associated Root tree sigFilename = "../public/ntuple_HZZ4L.root" sigFile, sigTree = getTree(sigFilename) # get background file and associated Root tree bkgFilename = "../public/ntuple_ZZ4L.root" bkgFile, bkgTree = getTree(bkgFilename) #------------------------------------------------------------------ # create a factory for booking machine learning methods #------------------------------------------------------------------ outputFile = TFile("TMVA.root", "recreate") options = ''' !V Color !Silent DrawProgressBar AnalysisType=Classification Transformations=I;D ''' factory = TMVA.Factory("Z1massZ2mass", outputFile, formatOptions(options)) #------------------------------------------------------------------ # set up data set for training and testing #------------------------------------------------------------------ dataLoader = TMVA.DataLoader(resultsDir) # define variables to be used dataLoader.AddVariable("Z1mass", 'D') dataLoader.AddVariable("Z2mass", 'D') # define from which trees data are to be taken # from and the global weights to be assigned to # the training data sigWeight = 1.0 dataLoader.AddSignalTree(sigTree, sigWeight) dataLoader.SetSignalWeightExpression("weight") bkgWeight = 1.0 dataLoader.AddBackgroundTree(bkgTree, bkgWeight) dataLoader.SetBackgroundWeightExpression("weight") # you can apply cuts, if needed cut = TCut("") options = ''' SplitMode=Random NormMode=EqualNumEvents nTrain_Signal=2500 nTest_Signal=2500 nTrain_Background=2500 nTest_Background=2500 !V ''' dataLoader.PrepareTrainingAndTestTree( cut, # signal cut cut, # background cut formatOptions(options)) #------------------------------------------------------------------ # ok, almost done, define machine learning methods to be run #------------------------------------------------------------------ options = ''' !H !V VarTransform=None ''' factory.BookMethod(dataLoader, TMVA.Types.kFisher, "Fisher", formatOptions(options)) options = ''' !H !V BoostType=AdaBoost NTrees=200 nEventsMin=100 nCuts=50 ''' factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT", formatOptions(options)) options = ''' !H !V NCycles=500 VarTransform=N HiddenLayers=5 TrainingMethod=BFGS ''' factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP", formatOptions(options)) #------------------------------------------------------------------ # ok, let's go! #------------------------------------------------------------------ factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close()
'BDTF': 0, 'MLP': 0, 'MLPBFGS': 0, 'MLPBNN': 0, 'CFMlpANN': 0, 'TMlpANN': 0 } factory = t.Factory( 'vbf_bdt_combined_james_current', outputFile, '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification' ) factory.Print() t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir dataloader = t.DataLoader(".") dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F') dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F') dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F') dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F') dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F') dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F') dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F') dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james', 'F') dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F') dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F') # trees for training inputFile = r.TFile('{0}/output_combined.root'.format(inputFilesDir))
#print("Input file: {}".format(INPUTFILE)) READ = False with open(dataset + "/optimize_" + outf_key + "/varsListHPO.txt") as file: for line in file.readlines(): if READ == True: varList.append(str(line).strip()) if "Variable List:" in line: READ = True numVars = len(varList) outputfile = TFile( dataset + "/weights/TMVAOptimization_" + str(numVars) + "vars.root", "RECREATE") loader = TMVA.DataLoader(dataset + "/optimize_" + outf_key) for var in varList: loader.AddVariable(var, "", "", "F") # add signal to loader if year == 2017: for i in range(len(varsList.sig2017_1)): sig_list.append(TFile.Open(inputDir + varsList.sig2017_1[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i], 1) elif year == 2018: for i in range(len(varsList.sig2018_1)): sig_list.append(TFile.Open(inputDir + varsList.sig2018_1[i])) sig_trees_list.append(sig_list[i].Get("ljmet"))
import ROOT from ROOT import TMVA, TFile, TTree, TCut, TString # In[2]: outputFile = TFile("TMVA.root", 'RECREATE') ROOT.TMVA.Tools.Instance() factory = TMVA.Factory( 'TMVAClassification', outputFile, '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification' ) # In[3]: loader = TMVA.DataLoader("dataset_cv") loader.AddVariable("Pt", "P_{T}", "GeV", 'D') loader.AddVariable("metE", "E^{miss}_{T}", "GeV", 'D') loader.AddVariable("dPhi", "d#phi", 'D') loader.AddVariable("Mt", "M_{T}", "GeV", 'D') # In[4]: f1 = ROOT.TFile.Open('/eos/user/g/gtolkach/signal_event_with_cuts_plus.root') f2 = ROOT.TFile.Open( '/eos/user/g/gtolkach/beackground_event_with_cuts_plus.root') signal = f1.Get('NOMINAL') background = f2.Get('NOMINAL') # In[5]:
) print ' ' print 'Copying data DecayTree to TreeB ...' TreeB = tdata.CopyTree("") print 'Data DecayTree copied to TreeB' print ' ' print ' ' print 'Copying MC DecayTree to TreeS ...' TreeS = tmc.CopyTree("") print 'MC DecayTree copied to TreeS' print ' ' dataloader = TMVA.DataLoader( "dataset1" ) # xml weights in dataset1/weights/TMVAClassification_BDT.weights.xml dataloader.AddVariable("Bs_PT", "D") dataloader.AddVariable("Kst_PT", "D") dataloader.AddVariable("Kstb_PT", "D") dataloader.AddVariable("max_Kp_Km_PT := max(Kp_PT,Km_PT)", "D") dataloader.AddVariable("min_Kp_Km_PT := min(Kp_PT,Km_PT)", "D") dataloader.AddVariable("max_pip_pim_PT := max(pip_PT,pim_PT)", "D") dataloader.AddVariable("min_pip_pim_PT := min(pip_PT,pim_PT)", "D") dataloader.AddVariable("Bs_DIRA_OWNPV", "D") dataloader.AddVariable("Bs_ENDVERTEX_CHI2", "D") dataloader.AddVariable("Bs_LOKI_ETA", "D") dataloader.AddVariable("Kst_LOKI_ETA", "D") dataloader.AddVariable("Kstb_LOKI_ETA", "D") dataloader.AddVariable("max_Kp_Km_ETA := max(Kp_LOKI_ETA,Km_LOKI_ETA)", "D")
def main(): usage = 'usage: %prog [options]' parser = optparse.OptionParser(usage) parser.add_option( '-s', '--signal_sample', dest='input_file_name_signal', help='signal sample path', default= 'samples/samples-NJet_geq_3/MVATraining/ttHnobb_TrainMVANoCutJetN.root', type='string') parser.add_option( '-x', '--bckg1_sample', dest='input_file_name_ttJets', help='background sample 1 path', default= 'samples/samples-NJet_geq_3/MVATraining/ttJets_TrainMVANoCutJetN.root', type='string') parser.add_option( '-y', '--bckg2_sample', dest='input_file_name_ttW', help='background sample 2 path', default= 'samples/samples-NJet_geq_3/MVATraining/ttWJets_TrainMVANoCutJetN.root', type='string') parser.add_option( '-z', '--bckg3_sample', dest='input_file_name_ttZ', help='background sample 3 path', default= 'samples/samples-NJet_geq_3/MVATraining/ttZJets_TrainMVANoCutJetN.root', type='string') parser.add_option('-a', '--activation', dest='activation_function', help='activation function', default='relu', type='string') parser.add_option('-l', '--hidden_layers', dest='number_of_hidden_layers', help='number of hidden layers', default='2', type='int') parser.add_option('-t', '--var_transform', dest='var_transform_name', help='transformation used on input variables', default='None', type='string') parser.add_option('-j', '--json', dest='json', help='json file with list of variables', default=None, type='string') parser.add_option('-r', '--learning_rate', dest='learning_rate', help='learning rate', default=0.008, type='float') parser.add_option('-n', '--num_epochs', dest='num_epochs', help='number of epochs', default=10, type='string') (opt, args) = parser.parse_args() number_of_hidden_layers = opt.number_of_hidden_layers activation_function = opt.activation_function var_transform_name = opt.var_transform_name num_epochs = opt.num_epochs jsonFile = open(opt.json, 'r') new_variable_list = json.load(jsonFile, encoding='utf-8').items() learning_rate = opt.learning_rate layer_nodes = 40 # Setup TMVA interface to use Keras TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() if ',' in var_transform_name: var_transform_name_list = var_transform_name.split(',') new_var_transform_name = '+'.join(var_transform_name_list) print 'new_var_transform_name: ', new_var_transform_name else: print 'var_transform_name = ', var_transform_name new_var_transform_name = var_transform_name print 'new_var_transform_name: ', new_var_transform_name num_inputs = 0 for key, value in new_variable_list: num_inputs = num_inputs + 1 print 'num inputs = ', str(num_inputs) classifier_parent_dir = 'DNN_noCutJetN_%sVars_%sHLs_%s_%s-VarTrans_%s-learnRate_%s-epochs-%s-nodes' % ( str(num_inputs), str(number_of_hidden_layers), activation_function, new_var_transform_name, str(learning_rate), num_epochs, str(layer_nodes)) classifier_samples_dir = classifier_parent_dir + "/outputs" if not os.path.exists(classifier_samples_dir): os.makedirs(classifier_samples_dir) output_file_name = '%s/%s.root' % (classifier_samples_dir, classifier_parent_dir) output_file = TFile.Open(output_file_name, 'RECREATE') # 'AnalysisType' is where one defines what kind of analysis you're doing e.g. multiclass, Classification .... # VarTransform: Decorrelation, PCA-transformation, Gaussianisation, Normalisation (for all classes if none is specified). # When transformation is specified in factory object, the transformation is only used for informative purposes (not used for classifier inputs). # Distributions can be found in output to see how variables would look if transformed. factory_name = 'Factory_%s' % (classifier_parent_dir) factory_string = '!V:!Silent:Color:DrawProgressBar:Transformations=%s:AnalysisType=multiclass' % var_transform_name factory = TMVA.Factory(factory_name, output_file, factory_string) #Load data input_file_name_signal = opt.input_file_name_signal data_signal = TFile.Open(input_file_name_signal) signal = data_signal.Get('syncTree') input_file_name_ttJets = opt.input_file_name_ttJets data_bckg_ttJets = TFile.Open(input_file_name_ttJets) background_ttJets = data_bckg_ttJets.Get('syncTree') input_file_name_ttW = opt.input_file_name_ttW data_bckg_ttW = TFile.Open(input_file_name_ttW) background_ttW = data_bckg_ttW.Get('syncTree') input_file_name_ttZ = opt.input_file_name_ttZ data_bckg_ttZ = TFile.Open(input_file_name_ttZ) background_ttZ = data_bckg_ttZ.Get('syncTree') # Declare a dataloader interface dataloader_name = classifier_parent_dir dataloader = TMVA.DataLoader(dataloader_name) # Can add selection cuts via: # dataloader.AddTree(background_ttJets, 'Background_1', 'myvar > cutBarrelOnly && myEventTypeVar=1', backgroundWeight) ### Global event weights ### signalWeight = 1. backgroundWeight0 = 1. backgroundWeight1 = 1. backgroundWeight2 = 1. dataloader.AddTree(signal, 'ttH', signalWeight) dataloader.AddTree(background_ttW, 'ttW', backgroundWeight0) dataloader.AddTree(background_ttZ, 'ttZ', backgroundWeight1) dataloader.AddTree(background_ttJets, 'ttJets', backgroundWeight2) branches = {} for key, value in new_variable_list: dataloader.AddVariable(str(key)) branches[key] = array('f', [-999]) print 'variable: ', key branchName = '' branchName = key dataloader.AddSpectator('nEvent', 'F') # Nominal event weight: # event weight = puWgtNom * trigWgtNom * lepSelEffNom * genWgt * xsecWgt (* 0 or 1 depending on if it passes event selection) dataloader.SetWeightExpression("EventWeight", "ttH") dataloader.SetWeightExpression("EventWeight", "ttW") dataloader.SetWeightExpression("EventWeight", "ttZ") dataloader.SetWeightExpression("EventWeight", "ttJets") # NormMode: Overall renormalisation of event-by-event weights used in training. # "NumEvents" = average weight of 1 per event, independantly renormalised for signal and background. # "EqualNumEvents" = average weight of 1 per signal event, sum of weights in background equal to sum of weights for signal. #dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:NormMode=EqualNumEvents') dataloader.PrepareTrainingAndTestTree( TCut(''), 'V:SplitMode=Random:NormMode=EqualNumEvents') # Generate model: model = Sequential() # Add layers to DNN ''' Dense: # Number of nodes init= # Initialisation activation= # Activation input_dim= # Shape of inputs (Number of inputs). Argument only needed for first layer. ''' # first hidden layer model.add( Dense(layer_nodes, init='glorot_normal', activation=activation_function, input_dim=len(new_variable_list))) # Randomly set a fraction rate of input units (defined by argument) to 0 at each update during training (helps prevent overfitting). #model.add(Dropout(0.2)) # Hidden layers for x in xrange(number_of_hidden_layers): model.add(Dense(layer_nodes, activation=activation_function)) # Output layer # softmax ensures output values are in range 0-1. Can be used as predicted probabilities. # 'softmax' activation function used in final layer so that the outputs represent probabilities (output is normalised to 1). model.add(Dense(4, activation='softmax')) # Set loss and optimizer # categorical_crossentropy = optimisation algorithm with logarithmic loss function # binary_crossentropy model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=learning_rate), metrics=[ 'accuracy', ]) # Store model in file model.save('model.h5') model.summary() # Book methods # Choose classifier and define hyperparameters e.g number of epochs, model filename (as chosen above) etc. # VarTransform: Decorrelate, PCA, Gauss, Norm, None. # Transformations used in booking are used for actual training. logs_dir = classifier_parent_dir + '/logs' #factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100:Tensorboard=%s' % (var_transform_name, num_epochs, logs_dir) factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100' % ( var_transform_name, num_epochs) factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "DNN", factory_string_bookMethod) # Run training, testing and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods()
output = TFile.Open('./outputMVA/' + signalID + out + '.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification' ) # Load data path = '/home/myamatan/data3/angularAna/MVA_workspace/sampleMake/output/' dataS = TFile.Open(path + signalName + '.root') signal = dataS.Get('coll') dataB = TFile.Open(path + bkgName + '.root') background = dataB.Get('coll') #dataloader = TMVA.DataLoader('weights') dataloader = TMVA.DataLoader('test') nSF = 1 nTrainSig = 40000 * nSF nTrainBkg = 40000 * nSF nInput = 10 if gp in {'VBFH', 'VBFHVT'}: if regime == "Resolved": dataloader.AddVariable('Mvbfjj') dataloader.AddVariable('Ptvbfjj') dataloader.AddVariable('dEtavbfjj') dataloader.AddVariable('PtBalance') dataloader.AddVariable('MaxEta') dataloader.AddVariable('EtaStar') #dataloader.AddVariable('dEta_vbfjet1_Vqq') #dataloader.AddVariable('dEta_vbfjet2_Vqq')
def __init__(self,options): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() gROOT.LoadMacro( "./TMVAGui.C" ) self._lOutput = TFile.Open('TMVA.root', 'RECREATE') self._lFactory = TMVA.Factory('TMVAClassification', self._lOutput,'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') self._lDataLoader = TMVA.DataLoader("dataset") for i0 in options.lVars: self._lDataLoader.AddVariable(i0,'F') self._lDataLoader.Print("all") #define signal and background tree based on LV flag if options.new: self._lInputFile = TFile.Open(options.infile+'.root') self._lInputTree = self._lInputFile.Get("Events") self._lSigFile = TFile.Open("sig.root","RECREATE") self._lBkgFile = TFile.Open("bkg.root","RECREATE") self._lSigFile.cd() self._lSigTree = self._lInputTree.CopyTree("LV && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5") self._lSigTree.Show(53) self._lSigTree.Write() self._lSigFile.Close() self._lBkgFile.cd() self._lBkgTree = self._lInputTree.CopyTree("!LV")# && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5") self._lBkgTree.Write() self._lBkgFile.Close() self._lSigFile = TFile.Open("sig.root","READ") self._lBkgFile = TFile.Open("bkg.root","READ") self._lSigTree = self._lSigFile.Get("Events") self._lBkgTree = self._lBkgFile.Get("Events") self._lDataLoader.AddSignalTree (self._lSigTree,1.0) self._lDataLoader.AddBackgroundTree(self._lBkgTree,1.0) nSig = self._lSigTree.GetEntries() nBkg = self._lBkgTree.GetEntries() nSigTrain = nSig*0.8 nBkgTrain = nSig*0.8*1.2 self._lDataLoader.PrepareTrainingAndTestTree(TCut(""),TCut(""),"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:NormMode=NumEvents:!V"%(nSigTrain,nBkgTrain,nSigTrain,nBkgTrain)) Methods = { #'Variable': TMVA.Types.kVariable, #'Cuts': TMVA.Types.kCuts, 'Likelihood': TMVA.Types.kLikelihood, #'BDT': TMVA.Types.kBDT #'PyRandomForest': TMVA.Types.kPyRandomForest, #'MaxMethod': TMVA.Types.kMaxMethod } ''' 'PDERS': TMVA.Types.kPDERS, 'HMatrix': TMVA.Types.kHMatrix, 'Fisher': TMVA.Types.kFisher, 'KNN': TMVA.Types.kKNN, 'CFMlpANN': TMVA.Types.kCFMlpANN, 'TMlpANN': TMVA.Types.kTMlpANN, 'BDT': TMVA.Types.kBDT, 'DT': TMVA.Types.kDT, 'RuleFit': TMVA.Types.kRuleFit, 'SVM': TMVA.Types.kSVM, 'MLP': TMVA.Types.kMLP, 'BayesClassifier': TMVA.Types.kBayesClassifier, 'FDA': TMVA.Types.kFDA, 'Boost': TMVA.Types.kBoost, 'PDEFoam': TMVA.Types.kPDEFoam, 'LD': TMVA.Types.kLD, 'Plugins': TMVA.Types.kPlugins, 'Category': TMVA.Types.kCategory, 'DNN': TMVA.Types.kDNN, 'PyRandomForest': TMVA.Types.kPyRandomForest, 'PyAdaBoost': TMVA.Types.kPyAdaBoost, 'PyGTB': TMVA.Types.kPyGTB, 'PyKeras': TMVA.Types.kPyKeras, 'C50': TMVA.Types.kC50, 'RSNNS': TMVA.Types.kRSNNS, 'RSVM': TMVA.Types.kRSVM, 'RXGB': TMVA.Types.kRXGB, 'MaxMethod': TMVA.Types.kMaxMethod ''' for m,t in Methods.iteritems(): self._lFactory.BookMethod( self._lDataLoader, t, m, "" ) self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kBDT, 'BDT', '!H:!V:NTrees=300:MinNodeSize=2.5%:MaxDepth=3:BoostType=Grad:SeparationType=GiniIndex:nCuts=100:UseBaggedBoost=True:PruneMethod=NoPruning') #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT2', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=crossentropy:ncuts=100:prunemethod=nopruning') #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT3', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=GiniIndex:ncuts=100:prunemethod=nopruning') #self._lFactory.BookMethod( self._lDataLoader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) model = Sequential() model.add(Dense(len(options.lVars), input_dim=len(options.lVars),activation='tanh')) model.add(Dense(30, activation='tanh')) model.add(Dense(20, activation='relu')) model.add(Dense(10, activation='tanh')) model.add(Dense(5, activation='relu')) model.add(Dense(2, activation='sigmoid')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy',]) model.save('model.h5') model.summary() self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:FilenameModel=model.h5:NumEpochs=10:BatchSize=500') frozen_graph = freeze_session(K.get_session(), output_names=[out.op.name for out in model.outputs]) tf.train.write_graph(frozen_graph, "h5_files", "tf_model.pb", as_text=False) self._lFactory.Print("v")
'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') # Load data dataBs = TFile.Open('bankBsJpsiPhi16.root') dataBsD0 = TFile.Open('bankBsJpsiPhiDGamma016.root') dataBu = TFile.Open('bankBuJpsiK16.root') dataBd = TFile.Open('bankBdJpsiKx16.root') dataBdNR = TFile.Open('bankBdKxMuMu16.root') treeBs = dataBs.Get('PDsecondTree') treeBsD0 = dataBsD0.Get('PDsecondTree') treeBu = dataBu.Get('PDsecondTree') treeBd = dataBd.Get('PDsecondTree') treeBdNR = dataBdNR.Get('PDsecondTree') dataloader = TMVA.DataLoader('dataset') # add variables dataloader.AddVariable('muoPt', 'F') dataloader.AddVariable('abs(muoEta)', 'F') dataloader.AddVariable('muoSegmComp', 'F') dataloader.AddVariable('muoChi2LM', 'F') dataloader.AddVariable('muoChi2LP', 'F') dataloader.AddVariable('muoGlbTrackTailProb', 'F') dataloader.AddVariable('muoIValFrac', 'F') dataloader.AddVariable('muoLWH', 'I') dataloader.AddVariable('muoTrkKink', 'F') dataloader.AddVariable('muoGlbKinkFinderLOG', 'F') dataloader.AddVariable('muoTimeAtIpInOutErr', 'F') dataloader.AddVariable('muoOuterChi2', 'F') dataloader.AddVariable('muoInnerChi2', 'F')
def _make_dataloader(self): '''Make the DataLoader for training.''' # Load the data. self.dataloader = TMVA.DataLoader(self.name) # Add training variables. for var in self.variables: if not isinstance(var, (tuple, list)): var = (var, ) try: self.dataloader.AddVariable(*var) except: print 'Failed to call dataloader.AddVariable with args', var raise # Add spectator variables. for var in self.spectators: if not isinstance(var, (tuple, list)): var = (var, ) try: self.dataloader.AddSpectator(*var) except: print 'Failed to call dataloader.AddSpectator with args', var raise # Register trees. # If we have explicit cuts for training and testing, we need to copy the TTrees first, # applying these cuts. if self.trainingcut: pwd = ROOT.gROOT.CurrentDirectory() self.tmpfile = ROOT.TFile.Open( os.path.abspath('DataLoader_' + random_string() + '.root'), 'recreate') self.tmpfile.cd() signal_usedleaves, background_usedleaves = self.used_leaves() usedleaves = { 'Signal': signal_usedleaves, 'Background': background_usedleaves } aliases = { 'Signal': get_aliases(self.signaltree), 'Background': get_aliases(self.backgroundtree) } addtreeargs = [] for name in 'Signal', 'Background': lname = name.lower() namecut = getattr(self, lname + 'cut') for tname, ttype, cut in ('Training', TMVA.Types.kTraining, self.trainingcut), ( 'Testing', TMVA.Types.kTesting, self.testingcut): classname = self.name + '_' + name + '_' + tname + '_' cut = AND(*filter(None, [namecut, cut])) tree = getattr(self, lname + 'tree') seltree, copyfriends = copy_tree( tree, selection=cut, keepbranches=usedleaves[name], rename=( lambda name: classname + name.replace('/', '_')), write=True, returnfriends=True) addtreeargs.append((seltree.GetName(), name, getattr(self, lname + 'globalweight'), ROOT.TCut(''), ttype)) weight = getattr(self, lname + 'weight') if weight: self.dataloader.SetWeightExpression(weight, name) fname = self.tmpfile.GetName() self.tmpfile.Close() self.tmpfile = ROOT.TFile.Open(fname) for args in addtreeargs: tree = self.tmpfile.Get(args[0]) _aliases = aliases['Signal'] if 'Signal' in args else aliases[ 'Background'] for name, alias in _aliases.items(): tree.SetAlias(name, alias) self.dataloader.AddTree(tree, *args[1:]) self.dataloader.GetDataSetInfo().SetSplitOptions( str(self.splitoptions)) if pwd: pwd.cd() else: self.dataloader.AddSignalTree(self.signaltree, self.signalglobalweight) self.dataloader.AddBackgroundTree(self.backgroundtree, self.backgroundglobalweight) # Set weight expressions. if self.signalweight: self.dataloader.SetSignalWeightExpression(self.signalweight) if self.backgroundweight: self.dataloader.SetBackgroundWeightExpression( self.backgroundweight) # Prepare the training. self.dataloader.PrepareTrainingAndTestTree( ROOT.TCut(self.signalcut), ROOT.TCut(self.backgroundcut), str(self.splitoptions)) return True
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH mass = DEFAULT_MASS varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-k", "--mass"): mass = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList))+'vars' Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth outfname = "dataset/weights/TMVA_"+Note+".root" # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Output file outputFile = TFile( outfname, 'RECREATE' ) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose( verbose ) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note dataloader = TMVA.DataLoader('dataset') for iVar in varList: if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I') else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F') inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir+infname) sigChain = iFileSig.Get("ljmet") dataloader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(inputDir+varsList.bkg[i])) print inputDir+varsList.bkg[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue dataloader.AddBackgroundTree( bkg_trees_list[i], 1) signalWeight = 1 dataloader.SetSignalWeightExpression( weightStrS ) dataloader.SetBackgroundWeightExpression( weightStrB ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028' model = Sequential() model.add(Dense(100, activation='relu', input_dim=53)) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(100, activation="relu"))) model.add((Dense(2, activation="sigmoid"))) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',]) # Store model to file model.save('model.h5') model.summary() if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close() # save plots: print "DONE"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA TMVA.Tools.Instance() # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) dataloader = TMVA.DataLoader("dataset") # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] dataloader.AddVariable( "myvar1 := var1+var2", 'F' ) dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ) dataloader.AddVariable( "var3", "Variable 3", "units", 'F' ) dataloader.AddVariable( "var4", "Variable 4", "units", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables #dataloader.AddSpectator( "spec1 := var1*2", "Spectator 1", "units", 'F' ); #dataloader.AddSpectator( "spec2 := var1*3", "Spectator 2", "units", 'F' ); # Read input data if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) input = TFile.Open( infname ) # Get the signal and background trees for training signal = input.Get( treeNameSig ) background = input.Get( treeNameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree ( signal, signalWeight ) dataloader.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) mycutBkg = TCut( "" ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros TMVA.TMVAGui(outfname) # keep the ROOT thread running gApplication.Run()
TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA_CNN_PyKeras.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=None:AnalysisType=Classification' ) ############################Loading the data file data = TFile.Open("/home/jui/Desktop/tmva/sample_images_32x32.root") # print(data.ls()) signal = data.Get('sig_tree;2') background = data.Get('bkg_tree;2') dataloader = TMVA.DataLoader('dataset_evaltest') imgSize = 1024 dataloader.AddVariablesArray("vars", imgSize) dataloader.AddSignalTree(signal, 1.0) dataloader.AddBackgroundTree(background, 1.0) dataloader.PrepareTrainingAndTestTree( TCut(''), 'nTrain_Signal=8000:nTrain_Background=8000:SplitMode=Random:NormMode=NumEvents:!CalcCorrelations:!V' ) # Generate model # Define model
argv = [] inputFile_sig = TFile.Open( "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_both_smearf_1M_35ns_e" + str(options.eff) + ".root") inputFile_bkg = TFile.Open( "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_bkg_smearf_1M_35ns_e" + str(options.eff) + ".root") outputFile = TFile.Open("TMVAOutput_sigbkg_e" + str(options.eff) + ".root", "RECREATE") factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification") loader = TMVA.DataLoader("dataset_e" + str(options.eff)) #loader.AddVariable("EventNumHazel",'I') #loader.AddVariable("EventNumGingko",'I') #loader.AddVariable("trigger_gingko",'I') #loader.AddVariable("iroad_x",'I') #loader.AddVariable("iroad_u",'I') #loader.AddVariable("iroad_v",'I') loader.AddVariable("Hit_plane0", 'I') loader.AddVariable("Hit_plane1", 'I') loader.AddVariable("Hit_plane2", 'I') loader.AddVariable("Hit_plane3", 'I') loader.AddVariable("Hit_plane4", 'I') loader.AddVariable("Hit_plane5", 'I') loader.AddVariable("Hit_plane6", 'I') loader.AddVariable("Hit_plane7", 'I')