Esempio n. 1
0
#samples have a name, a color, a path, and a selection (not implemented yet for training)
#only the path is really relevant atm
cat='6j4t'
signal_test=Sample('t#bar{t}H test',ROOT.kBlue,'/nfs/dust/cms/user/hmildner/mva-training/trees/tthbb_fast_'+cat+'_even.root','') 
signal_train=Sample('t#bar{t}H training',ROOT.kGreen,'/nfs/dust/cms/user/hmildner/mva-training/trees/tthbb_fast_'+cat+'_odd.root','')
background_test=Sample('t#bar{t} test',ROOT.kRed+1,'/nfs/dust/cms/user/hmildner/mva-training/trees/ttbar_'+cat+'_even.root','')
background_train=Sample('t#bar{t} training',ROOT.kRed-1,'/nfs/dust/cms/user/hmildner/mva-training/trees/ttbar_'+cat+'_odd.root','')
trainer=Trainer(variables,addtional_variables)

trainer.addSamples(signal_train,background_train,signal_test,background_test) #add the sample defined above
trainer.setTreeName('MVATree') # name of tree in files
trainer.setReasonableDefaults() # set some configurations to reasonable values
trainer.setEqualNumEvents(True) # reweight events so that integral in training and testsample is the same
trainer.useTransformations(False) # faster this way
trainer.setVerbose(False) # no output during BDT training and testing
trainer.setWeightExpression('Weight')
trainer.setSelection('N_Jets>=6&&N_BTagsM>=4') # selection for category (not necessary if trees are split)
trainer.removeWorstUntil(10) # removes worst variable until only 10 are left 
trainer.optimizeOption('NTrees') # optimizies the number of trees by trying more and less trees # you need to reoptimize ntrees depending on the variables and on other parameters
trainer.addBestUntil(12) # add best variables until 12 are used
trainer.optimizeOption('NTrees')
trainer.removeWorstUntil(10)
trainer.optimizeOption('NTrees')
trainer.removeWorstUntil(8)
trainer.optimizeOption('NTrees')
print "these are found to be the 8 best variables and best bdt and factory options"
print trainer.best_variables
print trainer.bdtoptions
print trainer.factoryoptions