Example #1
0
    def train_factory(self, outputfile):
        '''Train using TMVA::Factory.'''

        # Make the DataLoader.
        self.prepare()

        # Make the factory.
        factory = TMVA.Factory(self.name, outputfile, str(self.factoryoptions))
        factory.SetVerbose(self.verbose)

        methods = self.book_methods(factory)

        # Train MVAs
        factory.TrainAllMethods()

        # Test MVAs
        factory.TestAllMethods()

        # Evaluate MVAs
        factory.EvaluateAllMethods()

        # Save the output.
        outputfile.Close()

        print '=== wrote root file {0}\n'.format(outputfile.GetName())
        print '=== TMVAClassification is done!\n'

        weightsfiles = dict((m, self.weights_file(m)) for m in methods)
        classfiles = dict(
            (m, self.weights_file(m, '.class.C')) for m in methods)
        return weightsfiles, classfiles
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA.root', 'RECREATE')
    factory = TMVA.Factory('TMVAClassification', output,
            '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('dataset')
Example #3
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA_SSSF.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
    # factory = TMVA.Factory('TMVAClassification', output,   '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('datasetSSSF04Feb')
    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        print sampleName
        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V')

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C2",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=200:MaxDepth=2" );
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C1",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=100:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C05",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=50:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4500",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4750",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=750:MaxDepth=2"
    )

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Example #4
0
def TMVANN(filenameSTRING, outputDOTrootSTRING, sigtreeSTRINGS, bkgtreeSTRING,
           variablesSTRING):
    NNfile = R.TFile(outputDOTrootSTRING, "recreate")
    NNfile.Close()
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)  #importing the datasetsignalslowlevel
        signaltree = file.Get(sigtreeSTRING)  #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING)  #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING + weights)
        bkgweights = file.Get(bkgtreeSTRING + weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(
            outputDOTrootSTRING,
            "update")  #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory(
            "TMVANN", NNfile,
            "V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
        )
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        datasetsignalslowlevel = TMVA.DataLoader(
            "datasetsignalslowlevel")  #Instantiating a datasetsignalslowlevel
        datasetsignalslowlevel.AddSignalTree(signaltree, 1.)  #adding signal
        datasetsignalslowlevel.AddBackgroundTree(backgroundtree,
                                                 1.)  #adding background
        print(sigweights)
        datasetsignalslowlevel.SetSignalWeightExpression(weights)
        datasetsignalslowlevel.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:  #adding our training variables to the TMVA
            datasetsignalslowlevel.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        datasetsignalslowlevel.PrepareTrainingAndTestTree(
            signalcut, backgroundcut,
            "nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V"
        )
        TMVAfactory.BookMethod(
            datasetsignalslowlevel, TMVA.Types.kMLP,
            "LowLevelNN_3layer25,20,10_100Epoch_tanhNeuron" + sigtreeSTRING,
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=25,20,10:TestRate=5"
        )

        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
    NNfile = R.TFile(outputDOTrootSTRING, "update")
Example #5
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    dataloader = TMVA.DataLoader('dataset_8Feb')
    output = TFile.Open('TMVA16.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut),
        'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents')

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT",
        "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500"
    )
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT8","!H:!V:NTrees=1500:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.75:SeparationType=GiniIndex:nCuts=1000" );

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=1200:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=800" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT3", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT4", "!H:!V:NTrees=700:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07"    ,   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Example #6
0
def TMVANN (filenameSTRING,outputDOTrootSTRING,sigtreeSTRINGS,bkgtreeSTRING,variablesSTRING):
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)          #importing the dataset
        signaltree = file.Get(sigtreeSTRING)    #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING+weights)
        bkgweights = file.Get(bkgtreeSTRING+weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(outputDOTrootSTRING,"recreate")      #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory("TMVANN",NNfile,"V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification")
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        dataset = TMVA.DataLoader("dataset")     #Instantiating a dataset
        dataset.AddSignalTree(signaltree,1.)     #adding signal
        dataset.AddBackgroundTree(backgroundtree ,1.) #adding background
        print(sigweights)
        #dataset.SetSignalWeightExpression(weights)
        #dataset.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:                #adding our training variables to the TMVA
            dataset.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        dataset.PrepareTrainingAndTestTree(signalcut,backgroundcut,"nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V")
        #using all signal and background points to train, random selection, normalised to summed event weights = number of events for each tree, no verbose

        #Booking some methods
        #TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8:TestRate=5")         #Artifical Neural Network 1 layers 500 epoch

        TMVAfactory.BookMethod(dataset,TMVA.Types.kFisher, "FisherMethod","H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10")    #Fisher Method
        TMVAfactory.BookMethod(dataset,TMVA.Types.kLikelihood, "BayesLikelihood","H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=60")    #Bayes likleihood
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_4Layer8Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8,8,8,8:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer6Node_500Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=6:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer3Node_100Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=3:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_2Layer5Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=5,5:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch





        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
        print ('TMVANN Ran & made ROOT file ' + outputDOTrootSTRING+sigtreeSTRING)
Example #7
0
def Evaluate(outdir):

  sys.stdout = open(outdir + '/tmva.log', 'w') 

  # Output file
  output = TFile(outdir + '/tmva.root', 'RECREATE')

  # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
  # All TMVA output can be suppressed by removing the "!" (not) in 
  # front of the "Silent" argument in the option string
  factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" )

  # Set the variables use for the analysis
  input = open(outdir + '/inputvars.txt')
  for variable in input.readlines():
    factory.AddVariable(variable[:-1], 'F')

  # Set the weight directory
  TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights"

  # Limit the creation of correlation plots
  TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20  

  # Set the input file with signal and background events
  factory.SetInputTrees(
    outdir + '/signals.txt',
    outdir + '/backgrounds.txt'
  )

  cutsig = TCut('')
  cutbkg = TCut('')
  
  factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" )   

  factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
    "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) 

  # Train MVAs
  factory.TrainAllMethods()

  # Test MVAs
  factory.TestAllMethods()

  # Evaluate MVAs
  factory.EvaluateAllMethods()

  # Save the output.
  output.Close()
Example #8
0
 def __init__(self, name, n_vars, n_targets=1,
              method='BDT', task='Classification'):
     self.name = name
     self.n_vars = n_vars
     self.n_targets = n_targets
     self.method = method
     self.task = task
     self.tmpdir = tempfile.mkdtemp()
     self.output = TFile(os.path.join(self.tmpdir, 'tmva_output.root'),
                         'recreate')
     self.factory = TMVA.Factory(name, self.output,
                                 'AnalysisType={0}:Silent'.format(task))
     for n in range(n_vars):
         self.factory.AddVariable('X_{0}'.format(n), 'F')
     if task == 'Regression':
         for n in range(n_targets):
             self.factory.AddTarget('y_{0}'.format(n), 'F')
Example #9
0
def train_and_test_MVA(name, signal_tree, background_tree, output_file_name,
                       n_sig, n_bgk):
    outfile = root_open(output_file_name, 'recreate')
    factory = TMVA.Factory(name, outfile, "!V:!Silent:Color:DrawProgressBar")
    # signal_tree.SetBranchStatus('*', 0)
    #     background_tree.SetBranchStatus('*', 0)
    for var in variables:
        #         signal_tree.SetBranchStatus(var, 1)
        #         background_tree.SetBranchStatus(var, 1)
        factory.AddVariable(var, 'F')
    factory.AddSignalTree(signal_tree)
    factory.AddBackgroundTree(bkg_tree)
    # passes selection (currently marked as all variables are defined.
    cut1 = Cut('absolute_eta > 0')
    cut2 = Cut('angle_bl > 0')
    cut3 = Cut('M3 > 0')
    cut = cut1 & cut2 & cut3

    training_options = "nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d:!V" % (
        n_sig, n_bgk, n_sig, n_bgk)
    factory.PrepareTrainingAndTestTree(cut, cut, training_options)

    # methods are
    # PDE - RS method (PDERS)
    # K-Nearest Neighbour classifier (KNN)
    # Linear discriminant (LD)
    factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood",
                       "!V:NAvEvtPerBin=50")

    # factory.BookMethod( TMVA.Types.kMLP, "MLP", "!V:NCycles=50:HiddenLayers=10,10:TestRate=5" )
    #
    # factory.BookMethod( TMVA.Types.kBDT, "BDT", "!V:BoostType=Grad:nCuts=20:NNodesMax=5" );
    # Train MVAs using the set of training events
    factory.TrainAllMethods()
    # ---- Evaluate all MVAs using the set of test events
    factory.TestAllMethods()
    # ----- Evaluate and compare performance of all configured MVAs
    factory.EvaluateAllMethods()

    outfile.close()
    bkg_file.close()
Example #10
0
File: train.py Project: bb511/B2DD

def add_variables(data_loader, good_variables):

    for variable in good_variables:
        if variable != "Bc_M":
            data_loader.AddVariable(variable, "F")

    return data_loader


if __name__ == '__main__':
    decay_name = decay_names[decay_nb]
    output_file = TFile("~/TMVA/TMVAoutput" + decay_name + str(run) + ".root",
                        "RECREATE")
    factory = TMVA.Factory("TMVA_" + decay_name, output_file,
                           "DrawProgressBar=True")
    data_loader = TMVA.DataLoader("dataloader")

    moca_tree = dhand.combine_trees(run, decay_name, True)
    data_tree = dhand.combine_trees(run, decay_name, False)
    branches_to_keep = dhand.branch_selection(data_tree, branches, [])
    moca_tree = dhand.activate_branches(moca_tree, branches_to_keep)
    data_tree = dhand.activate_branches(data_tree, branches_to_keep)

    add_variables(data_loader, branches_to_keep)
    sgcut_test = TCut("runNumber%5==" + kfold +
                      "&& (Bc_M > 5200 && Bc_M < 5400)")
    sgcut_train = TCut("runNumber%5!=" + kfold +
                       "&& (Bc_M > 5200 && Bc_M < 5400)")
    bgcut_test = TCut("runNumber%5==" + kfold + "&& Bc_M > 5400")
    bgcut_train = TCut("runNumber%5!=" + kfold + "&& Bc_M > 5400")
from ROOT import TMVA, TFile, TTree, TCut, ObjectProxy
from subprocess import call
from os.path import isfile
import ROOT
 
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.regularizers import l2
from keras.optimizers import SGD
 
# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()
 
output = TFile.Open('TMVA.root', 'RECREATE')
factory = TMVA.Factory('TMVAMulticlass', output,
                       '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=multiclass')
 
# Load data
data = TFile.Open("/net/data_cms1b/user/dmeuser/top_analysis/output/ttbar_res100.0_new.root")
t_all = data.Get("ttbar_res100.0/ttbar_res_dilepton_CP5")
 
dataloader = TMVA.DataLoader('dataset')

#  ~for var in ["PuppiMET","METunc_Puppi","MET","HT","nJets","n_Interactions","Lep1_pt","Lep1_phi","Lep1_eta","Lep1_E","Lep1_flavor","Lep2_pt","Lep2_phi","Lep2_eta","Lep2_E","Lep2_flavor","Jet1_pt","Jet1_phi","Jet1_eta","Jet1_E","Jet2_pt","Jet2_phi","Jet2_eta","Jet2_E"]:
for var in ["PuppiMET","METunc_Puppi","HT","nJets","n_Interactions","Lep1_pt","Lep1_phi","Lep1_eta","Lep1_E","Lep1_flavor","Lep2_pt","Lep2_phi","Lep2_eta","Lep2_E","Lep2_flavor","Jet1_pt","Jet1_phi","Jet1_eta","Jet1_E","Jet2_pt","Jet2_phi","Jet2_eta","Jet2_E"]:
  dataloader.AddVariable(var)

dataloader.AddTree(t_all, 'Bin1')
dataloader.AddTree(t_all, 'Bin2')
dataloader.AddTree(t_all, 'Bin3')
dataloader.AddTree(t_all, 'Bin4')
Example #12
0
X = np.concatenate([class_0, class_1, class_2])
y = np.ones(X.shape[0])
w = RNG.randint(1, 10, n_events * 3)
y[:class_0.shape[0]] *= 0
y[-class_2.shape[0]:] *= 2
permute = RNG.permutation(y.shape[0])
X = X[permute]
y = y[permute]

# Split into training and test datasets
X_train, y_train, w_train = X[:n_events], y[:n_events], w[:n_events]
X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:]

output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('classifier', output,
                       'AnalysisType=Multiclass:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
for n in range(2):
    data.AddVariable('f{0}'.format(n), 'F')

# Call root_numpy's utility functions to add events from the arrays
add_classification_events(data, X_train, y_train, weights=w_train)
add_classification_events(data, X_test, y_test, weights=w_test, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')
def main():

    try:
        # retrive command line options
        shortopts = "m:M:C:B:i:t:o:vh?"
        opts, args = getopt.getopt(sys.argv[1:], shortopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    methods = DEFAULT_METHODS
    mass = DEFAULT_MASS
    cat = DEFAULT_CAT
    outfname = DEFAULT_OUTFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    bkg_method = DEFAULT_BACKGROUND
    width = DEFAULT_WIDTH
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-M", "--mass"):
            mass = int(a)
        elif o in ("-C", "--cat"):
            cat = int(a)
        elif o in ("-B", "--background"):
            bkg_method = int(a)
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    if (width == 0.02): width_str = "_2pt"
    elif (width == 0.07): width_str = "_7pt"
    mass_str = "_" + str(mass)
    cat_str = "_" + str(cat)
    if cat < 0:
        cat_str = "_all"
    outfname = outfname + mass_str + cat_str + ".root"
    #treeNameSig = treeNameSig + mass_str
    #treeNameBkg = treeNameBkg + mass_str

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library)
    # load also GUI
    gROOT.SetMacroPath("./")
    #gROOT.Macro       ( "./TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./TMVAGui.C" )

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for
    # more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Define the input variables that shall be used for the classifier training
    factory.AddVariable("pho1_ptOverM", "P_{T}^{lead} / M_{H}", "", 'F')
    factory.AddVariable("pho2_ptOverM", "P_{T}^{sublead} / M_{H}", "", 'F')
    factory.AddVariable("pho1_eta", "#eta^{lead}", "", 'F')
    factory.AddVariable("pho2_eta", "#eta^{sublead}", "", 'F')
    factory.AddVariable("d_phi", "#Delta #phi", "rad", 'F')
    #should this be cos delta Phi
    factory.AddVariable("H_ptOverM", "P_{T}^{Higgs}/M_{H}", "", 'F')
    factory.AddVariable("H_eta", "#eta^{Higgs}", "", 'F')

    factory.AddVariable("sigmaMOverM", "#sigmaM_{cor} / M", 'F')
    factory.AddVariable("sigmaMOverM_wrongVtx", "#sigmaM_{wrong} / M", 'F')

    factory.AddVariable("vtx_prob", "P_{vertex}", "", 'F')

    #factory.AddVariable( "cos_theta_star","cos(#theta)*", "", 'F' );
    #factory.AddVariable( "max_eta","max(#eta^{lead},#eta^{sub.})", "", 'F' );
    #factory.AddVariable( "min_r9","min(r9^{lead},r9^{sub.})", "", 'F' );

    factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F')

    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal_train = input.Get(treeNameSig + "_train" + mass_str + ".0")
    signal_test = input.Get(treeNameSig + "_test" + mass_str + ".0")

    background_train = input.Get(treeNameBkg + "_train" + width_str +
                                 mass_str + ".0")
    background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str +
                                ".0")

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    factory.AddSignalTree(signal_train, signalWeight, "train")
    factory.AddBackgroundTree(background_train, backgroundWeight, "train")
    factory.AddSignalTree(signal_test, signalWeight, "test")
    factory.AddBackgroundTree(background_test, backgroundWeight, "test")

    # Set individual event weights (the variables must exist in the original
    # TTree)
    factory.SetBackgroundWeightExpression("wt")
    factory.SetSignalWeightExpression("wt")

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycut = TCut("fabs(deltaMOverM)<=" + str(width))  #
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    factory.PrepareTrainingAndTestTree(
        mycut, mycut,
        "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V")
    # Boosted Decision Trees
    # NEW PARAMETERS
    factory.BookMethod(
        TMVA.Types.kBDT, "BDT_ada" + mass_str + cat_str,
        "!H:!V:NTrees=400:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
    )
    factory.BookMethod(
        TMVA.Types.kBDT, "BDT_grad" + mass_str + cat_str,
        "!H:!V:NTrees=500:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=50:NNodesMax=5"
    )
    #test

    # --------------------------------------------------------------------------------------------------
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    #factory.OptimizeAllMethods()
    factory.TrainAllMethods()
    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
from os.path import isfile
import sys
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Activation, Reshape
from keras.regularizers import l2
from keras.optimizers import SGD

# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()

output = TFile.Open('TMVA_CNN_PyKeras.root', 'RECREATE')
factory = TMVA.Factory(
    'TMVAClassification', output,
    '!V:!Silent:Color:DrawProgressBar:Transformations=None:AnalysisType=Classification'
)

############################Loading the data file
data = TFile.Open("/home/jui/Desktop/tmva/sample_images_32x32.root")
# print(data.ls())
signal = data.Get('sig_tree;2')
background = data.Get('bkg_tree;2')

dataloader = TMVA.DataLoader('dataset_evaltest')

imgSize = 1024

dataloader.AddVariablesArray("vars", imgSize)

dataloader.AddSignalTree(signal, 1.0)
Example #15
0
from root_numpy import ROOT_VERSION
from ROOT import TMVA, TFile, TCut
from array import array

plt.style.use('ggplot')
RNG = np.random.RandomState(1)

# Create an example regression dataset
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + \
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
data.AddVariable('x', 'F')
data.AddTarget('y', 'F')

add_regression_events(data, X, y)
add_regression_events(data, X, y, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), '')

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
Example #16
0
#ROOT.gROOT.SetBatch(True)

# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()
#r = ROOT.TRInterface.Instance()
#TMVA.RMethodBase.Initialize()
#MethodRXGB.Init()

output = TFile.Open(
    'Output_Classification_BsPhiJpsiEE_noPhiM_pTcuts_BOBest.root', 'RECREATE')
#factory = TMVA.Factory('TMVAClassification_BsPhiEE', output,
#        '!V:ROC:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

factory = TMVA.Factory(
    'TMVAClassification_BsPhiJpsiEE_noPhiM_pTcuts_BOBest', output,
    '!V:ROC:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

bkg_name = "BsPhiJpsiEE_MVATraining_Bkg_pTcuts.root"
sig_name = "BsPhiJpsiEE_MVATraining_Sig_pTcuts.root"
#bkg_name = "BsPhiJpsiEE_MVATraining_Bkg.root"
#sig_name = "BsPhiJpsiEE_MVATraining_Sig.root"

#branches = ['elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'jpsiPt', 'phiPt', 'bsPt', 'eledR', 'kaondR', 'jpsiPhidR', 'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead', 'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead', 'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead']
#branches = ['elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'bsPt', 'eledR', 'kaondR', 'jpsiPhidR', 'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead', 'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead', 'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead']
branches = [
    'elePtLead', 'elePtSublead', 'kaonPtLead', 'kaonPtSublead', 'bsPt',
    'svProb', 'svCosine', 'svLxySig', 'eleD0Lead', 'eleD0Sublead', 'eleDzLead',
    'eleDzSublead', 'kaonD0Lead', 'kaonD0Sublead', 'kaonDzLead',
    'kaonDzSublead', 'kaonNormChi2Lead', 'kaonNormChi2Sublead'
]
Example #17
0
loader.SetBackgroundWeightExpression(weightStrB)

mycutSig = TCut(cutStrS)
mycutBkg = TCut(cutStrB)

loader.PrepareTrainingAndTestTree(
    mycutSig, mycutBkg,
    "nTrain_Signal=" + str(NSIG) +\
  ":nTrain_Background=" + str(NBKG) +\
  ":nTest_Signal=" + str(NSIG_TEST) +\
  ":nTest_Background=" + str(NBKG_TEST) +\
  ":SplitMode=Random:NormMode=NumEvents:!V"
)

factory = TMVA.Factory(
    "Optimization",
    '!V:!ROC:!Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
)

(TMVA.gConfig().GetIONames()).fWeightFileDir = '/weights'

kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + modelName +\
               ':SaveBestOnly=true' +\
               ':NumEpochs=' + str(epochs) +\
               ':BatchSize=' + str(batchSize) +\
               ':TriesEarlyStopping=' + str(patience)

factory.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting)

factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
Example #18
0
    Use = {
        'BDT': 1,
        'BDTG': 0,
        'BDTRT': 0,
        'BDTB': 0,
        'BDTD': 0,
        'BDTF': 0,
        'MLP': 0,
        'MLPBFGS': 0,
        'MLPBNN': 0,
        'CFMlpANN': 0,
        'TMlpANN': 0
    }

    factory = t.Factory(
        'vbf_bdt_combined_james_current', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification'
    )
    factory.Print()

    t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir
    dataloader = t.DataLoader(".")

    dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F')
    dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F')
    dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F')
    dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F')
    dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F')
    dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F')
    dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F')
    dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james',
                           'F')
Example #19
0
 Authors: A. Ramirez-Morales and J. Salmon-Gamboa
 ---------------------------------------------------------------
'''

# tmva module

import ROOT
from ROOT import TFile, TMVA, TCut

m_outputFile = TFile("TMVA.root", 'RECREATE')
m_inputFile = TFile.Open("./data/cancer.root")
TMVA.Tools.Instance()

m_factory = TMVA.Factory(
    "TMVAClassification",
    m_outputFile  #this is optional
    ,
    "!V:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
)

m_loader = TMVA.DataLoader("dataset")

m_loader.AddVariable("age", 'F')
m_loader.AddVariable("menopause", 'F')
m_loader.AddVariable("tumorSize", 'F')
m_loader.AddVariable("invNodes", 'F')
m_loader.AddVariable("degMalig", 'F')
# m_loader.AddVariable("nodeCaps", 'F')
# m_loader.AddVariable("breast", 'I')
# m_loader.AddVariable("breastQuad")
# m_loader.AddVariable("irradiat")
Example #20
0
    traintree_name = treename
    trainfilename="proctrain.csv.root" 


    trainfile = TFile.Open(trainfilename,"read")
    traintree = trainfile.Get(traintree_name)
    
    TMVA.Tools.Instance()
    
    
    # create the tmva output file, which will be full of details about the training
    fout = TFile("tmvatest.root","RECREATE")


    # use the default factory
    factory = TMVA.Factory("TMVAClassification", fout)                                
    dataloader = TMVA.DataLoader("dataset")


    # build the list of variables
    al=traintree.GetListOfBranches()
    varlist=[]
    for i in range(al.GetEntries()):
        varlist+=[al[i].GetName()]

        
    if debug:
        print "all variables of ",trainfile, " ", varlist
        print "now stripping EventId Weight and Label "

    # these three variables should not be used for training
Example #21
0
def main():

    NTupName   = "JetTree"
    alg        = "TruthRawTrim"
    cuts       =  ["eta>-1.2","eta<1.2","pt>300","pt<5000","m>61","m<85"]
    vars       = ["Tau21","T2jet"]

    #NTupName   = "varTree"    
    verbose    = True
    
    #alg        = "AK10LCTRIMF5R20"
    spectators = ["m"]
    #cuts       = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"]
    #vars       = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"]
    methods    = "Likelihood"

    print "Starting and getting arguments:"
    allargs = sys.argv[1:]    
    if len(allargs)<5:
        print "You input these args"
        print allargs
        print "Not enough args, please try again"
        return 1
    else:
        alg        = allargs[0]
        spectators = allargs[1].split(",")
        cuts       = allargs[2].split(",")
        vars       = allargs[3].split(",")
        methods    = allargs[4]
    
    print "Running with args:"
    print "  alg        = ",alg        
    print "  spectators = ",spectators 
    print "  cuts       = ",cuts       
    print "  vars       = ",vars       
    print "  methods    = ",methods    
    

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()



    #===============================
    #Read training and test data
    #===============================
    #InputDir = "../gen_20170529/"
    InputDir="../Ana_EventGeneration/"
    #InputDir="~/Downloads/"
    print "Getting inputs from: ",InputDir
    #s1 = TFile(InputDir+"ntuple_ttbar_2000.root");
    #b1 = TFile(InputDir+"ntuple_dijet_800_1400.root");
    s1 = TFile(InputDir+"ntuple_tt_test10000.root");
    b1 = TFile(InputDir+"ntuple_dijet_test10000.root");

    # Output file
    OutFileName="testout.root"
    outputFile = TFile( OutFileName, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

    # data loader
    dataloader=TMVA.DataLoader("dataset")

#     weight=""
#     weight+="pass_selection*EventWeight*CrossSection*("
#     weight+=alg+"_pt>"+pt1+" && "
#     weight+=alg+"_pt<"+pt2
#     if m1!="0":
#         weight+=" && "+alg+"_m>"+m1+" && "
#         weight+=alg+"_m<"+m2
#     weight+=")"
#     
#     #Get signal and background histograms
#     if variable=="mass":
#         histname = "akt10"+alg+"_trim_"+variable
#     else:
#         histname = alg+"_"+variable
    
    #======================================
    #Predefined cuts - for isntance on M(j1)
    #======================================
    mycuts = "1.0"
    mycutb = "1.0"

    for cut in cuts:
        placecut=cut
        if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="m":
            placecut = "* ("+alg+"_"+cut+")"
        else:
            placecut="* ("+cut+") "
        mycuts += placecut
        mycutb += placecut

    
    print "MyCutsSig: ",mycuts
    print "MyCutsBkg: ",mycutb

    #===================================
    #Spectator variables from tree
    #=====================================
    for spec in spectators:
        dataloader.AddSpectator( spec, 'F' )
        
    #===================================
    #MVA variables from tree
    #=====================================
    for var in vars:
        dataloader.AddVariable( var , 'F' )

    #===============================
    #Read training and test data
    #===============================
    print "Getting trees ... ",NTupName
    st1 = s1.Get(NTupName)
    bt1 = b1.Get(NTupName)

    #=========================================
    # global event weights per tree (see below for setting event-wise weights)
    #=========================================
    ws1 = 1.0
    wb1 = 1.0

    #=========================================
    # You can add an arbitrary number of signal or background trees
    #=========================================
    dataloader.AddSignalTree    ( st1, ws1 );
    #SCHSU
    #dataloader.SetSignalWeightExpression("EventWeight*CrossSection");

    dataloader.AddBackgroundTree( bt1, wb1 );
    #dataloader.SetBackgroundWeightExpression("EventWeight*CrossSection");
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    mycutSig = TCut(mycuts)
    mycutBkg = TCut(mycutb)
    
    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
# DEFAULT
#         factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
#                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )
# CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )


    if "PDERSD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % OutFileName
    print "=== TMVAClassification is done!\n"
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"
Example #23
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    mass = DEFAULT_MASS
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-k", "--mass"):
            mass = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass
    outfname = "weights/TMVA_" + Note + ".root"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
#     gROOT.SetMacroPath( "./" )
#     gROOT.Macro       ( "./TMVAlogon.C" )
#    gROOT.LoadMacro   ( "./TMVAGui.C" )

# Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    #     factory = TMVA.Factory( "TMVAClassification", outputFile,
    #                             "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    for iVar in varList:
        if iVar[0] == 'NJets_singleLepCalc':
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I')
        else:
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    print 'mass point ' + mass
    infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % (
        mass)
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")
    #    os.exits(1)
    #BDT machinary
    factory.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir + varsList.bkg[i]))
        print inputDir + varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        factory.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression(weightStrS)
    factory.SetBackgroundWeightExpression(weightStrB)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig,
        mycutBkg,
        #                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" )
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # bdtSetting for "BDT"
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTMitFisher"
    bdtFSetting = '!H:!V:NTrees=%s' % nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTG"
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray'  #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTB"
    bdtBSetting = '!H:!V:NTrees=%s' % nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTD"
    bdtDSetting = '!H:!V:NTrees=%s' % nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
    #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m

    #BOOKING AN ALGORITHM
    if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting)
    if methods == "BDTG":
        factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting)
    if methods == "BDTMitFisher":
        factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting)
    if methods == "BDTB":
        factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting)
    if methods == "BDTD":
        factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting)
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    #
    #     print "=== wrote root file %s\n" % outfname
    #     print "=== TMVAClassification is done!\n"

    # save plots:
    os.chdir('weights/' + Note)
    #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies
    gROOT.SetBatch(1)
    TMVA.efficiencies(
        "../../" + outfname
    )  #Classifier Background Rejection vs Signal Efficiency (ROC curve)
    #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve)
    TMVA.mvas("../../" + outfname,
              0)  #Classifier Output Distributions (test sample)
    TMVA.correlations(
        "../../" + outfname)  #Input Variable Linear Correlation Coefficients
    TMVA.variables("../../" + outfname)  #Input variables (training sample)
    #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed)
    #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed)
    if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname)
    #     os.chdir('plots')
    #     try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png')
    #     except: pass
    #     #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png')
    #     #except: pass
    #     try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png')
    #     except: pass
    #     os.system('rm *.eps')
    print "DONE"
Example #24
0
def main(o, args):

    # Import TMVA classes from ROOT
    from ROOT import TMVA, TFile, TCut

    print o

    # Output file
    outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE')

    atype = "Classification"
    if hasattr(o, "type"):
        atype = str(o.type)
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s"
        % atype)

    # Set verbosity
    factory.SetVerbose(o.verbose)

    TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir

    # variables
    if type(o.variables) == str:
        o.variables = [
            v.lstrip().rstrip() for v in o.variables.split(":") if v != ""
        ]
    allvars = ""
    for v in o.variables:
        factory.AddVariable(str(v))
        if allvars != "": allvars += ":"
        allvars += v.split(":=")[0].lstrip(" ").rstrip(" ")
    print "variables %s" % allvars

    print o.spectators
    for s in o.spectators:
        if not s in o.variables:
            factory.AddSpectator(str(s))

    # categories and sub categories
    categories = []
    subcategories = []
    if hasattr(o, "subcategories") and len(o.subcategories) > 0:
        subcategories = o.subcategories[0]
        for sc in o.subcategories[1:]:
            subcategories = map(
                lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" %
                           (x[0][1], x[1][1])),
                itertools.product(subcategories, sc))

    for cut, name, vars in o.categories:
        myvars = allvars
        if vars != "":
            for v in vars.split(":"):
                myvars = myvars.replace(v, "").replace("::", ":")
            myvars = myvars.rstrip(":")

        vars = str(myvars)
        print vars

        if len(subcategories) > 0:
            for subcut, subname in subcategories:
                if subname == "":
                    subname = subname.replace(" ", "").replace(
                        ">", "_gt_").replace("<", "_lt_").replace(
                            "=", "_eq_").replace("&", "_and_")
                fullname = "%s_%s" % (name, subname)
                categories.append(
                    (TCut(cut) * TCut(subcut), str(fullname), vars))
        else:
            categories.append((TCut(cut), str(name), vars))

    # load tree
    selection = TCut(o.selection)
    for evclass, info in o.classes.iteritems():
        samples = info["samples"]
        for name, weight, cut, ttype in samples:
            tcut = TCut(cut) * selection
            factory.AddTree(mkChain(getListOfFiles(o.indir, o.files), name),
                            str(evclass), float(weight), tcut, int(ttype))
        # weights
        if "weight" in info:
            weight = info["weight"]
            factory.AddSpectator(str("%s_wei := %s" % (evclass, weight)))
            factory.SetWeightExpression(str(weight), str(evclass))
        else:
            factory.SetWeightExpression("1.", str(evclass))

    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        TCut(""), "SplitMode=Random:NormMode=NumEvents:!V")

    # --------------------------------------------------------------------------------------------------
    # Fisher discriminant (same as LD)
    defaultSettings = {
        "BDT":
        "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedGrad"
        ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"
        ":Shrinkage=0.3:NTrees=1000",
        "Cuts":
        "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart"
    }
    if "FisherD" in o.methods:
        mname = "FisherD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D")

    if "Fisher" in o.methods:
        mname = "Fisher%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs")

    if "Likelihood" in o.methods:
        mname = "Likelihood%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "LikelihoodD" in o.methods:
        mname = "LikelihoodD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "BDT" in o.methods:
        mname = str("BDT%s" % o.label)
        settings = defaultSettings["BDT"]
        if hasattr(o, "settings") and "BDT" in o.settings:
            settings = str(o.settings["BDT"])
        print mname, settings
        if len(categories) == 0:
            cats = factory.BookMethod(TMVA.Types.kBDT, mname, settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kBDT,
                               "%s_%s" % (mname, name), settings)

    if "Cuts" in o.methods:
        mname = "Cuts%s" % o.label
        settings = defaultSettings["Cuts"]
        if hasattr(o, "settings") and "Cuts" in o.settings:
            settings = str(o.settings["Cuts"])
        if len(categories) == 0:
            cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kCuts,
                               "%s_%s" % (mname, name), settings)

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
    if o.optimize:
        print "Optimizing?"
        factory.OptimizeAllMethods()

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
Example #25
0
def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
#!    gROOT.LoadMacro   ( "./TMVAGui.C" )
 
   
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run() 
Example #26
0
 def _setFactory(self, outFileName):
     self._fout = TFile(outFileName, "RECREATE")
     self._factory = TMVA.Factory(self._options['factory']['name'],
                                  self._fout,
                                  self._options['factory']['options'])
Example #27
0
def main():
    print "\n", "=" * 80
    print "\tDESY 2017 - classification with TMVA"
    print "=" * 80

    # summary root file
    summaryFilename = 'TMVA.root'

    # results directory
    resultsDir = 'results'
    os.system('mkdir -p %s' % resultsDir)

    #------------------------------------------------------------------
    # get signal file and associated Root tree
    sigFilename = "../public/ntuple_HZZ4L.root"
    sigFile, sigTree = getTree(sigFilename)

    # get background file and associated Root tree
    bkgFilename = "../public/ntuple_ZZ4L.root"
    bkgFile, bkgTree = getTree(bkgFilename)

    #------------------------------------------------------------------
    # create a factory for booking machine learning methods
    #------------------------------------------------------------------
    outputFile = TFile("TMVA.root", "recreate")
    options = '''
    !V
    Color
    !Silent
    DrawProgressBar
    AnalysisType=Classification
    Transformations=I;D
    '''
    factory = TMVA.Factory("Z1massZ2mass", outputFile, formatOptions(options))

    #------------------------------------------------------------------
    # set up data set for training and testing
    #------------------------------------------------------------------
    dataLoader = TMVA.DataLoader(resultsDir)

    # define variables to be used
    dataLoader.AddVariable("Z1mass", 'D')
    dataLoader.AddVariable("Z2mass", 'D')

    # define from which trees data are to be taken
    # from and the global weights to be assigned to
    # the training data
    sigWeight = 1.0
    dataLoader.AddSignalTree(sigTree, sigWeight)
    dataLoader.SetSignalWeightExpression("weight")

    bkgWeight = 1.0
    dataLoader.AddBackgroundTree(bkgTree, bkgWeight)
    dataLoader.SetBackgroundWeightExpression("weight")

    # you can apply cuts, if needed
    cut = TCut("")
    options = '''
    SplitMode=Random
    NormMode=EqualNumEvents
    nTrain_Signal=2500
    nTest_Signal=2500
    nTrain_Background=2500
    nTest_Background=2500
    !V 
    '''
    dataLoader.PrepareTrainingAndTestTree(
        cut,  # signal cut
        cut,  # background cut
        formatOptions(options))

    #------------------------------------------------------------------
    # ok, almost done, define machine learning methods to be run
    #------------------------------------------------------------------

    options = '''
    !H
    !V
    VarTransform=None
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kFisher, "Fisher",
                       formatOptions(options))

    options = '''
    !H
    !V
    BoostType=AdaBoost
    NTrees=200
    nEventsMin=100
    nCuts=50
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT",
                       formatOptions(options))

    options = '''
    !H
    !V
    NCycles=500
    VarTransform=N
    HiddenLayers=5
    TrainingMethod=BFGS
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP",
                       formatOptions(options))

    #------------------------------------------------------------------
    # ok, let's go!
    #------------------------------------------------------------------
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputFile.Close()
Example #28
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:b:s:a:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "bkgList=", "sigList=",
            "anaType=", "outputfile=", "verbose", "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    bkgList = DEFAULT_BKGLIST
    sigList = DEFAULT_SIGLIST
    anaType = DEFAULT_ANATYPE
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-b", "--bkgList"):
            bkgList = a
        elif o in ("-s", "--sigList"):
            sigList = a
        elif o in ("-a", "--anaType"):
            anaType = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    #Edited
    setupTMVA(pathToTMVA)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    suffix = outfname.split("/")[-1].split(".")[0]
    factory = TMVA.Factory(
        "TMVAClassification_%s" % suffix, outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    #Edited
    #follow 2LSS note Ch5.2: Disciminating variables
    factory.AddVariable("mT2    := sig.mT2", 'F')
    factory.AddVariable("pt     := l12.pt", 'F')
    factory.AddVariable("MET    := sig.MetRel", 'F')
    factory.AddVariable("Ht     := Sum$(jets.pt) + Sum$(leps.pt)", 'F')
    factory.AddVariable("mTl1   := leps.mT[0]", 'F')
    factory.AddVariable("mTl2   := leps.mT[1]", 'F')
    factory.AddVariable("ll_dPhi:= l12.dPhi", 'F')
    factory.AddVariable(
        "l12m   := (int(abs(leps.ID[0]))!=int(abs(leps.ID[1])))*100 + l12.m",
        'F')

    #ISR region
    if (anaType == "doISR"):
        factory.AddVariable("JetMET_dPhi  := jets.MET_dPhi[0]", 'F')
        factory.AddVariable("MET_JetPt_R  := sig.MetRel/jets.pt[0]", 'F')
        factory.AddVariable("l1Pt_JetPt_R := leps.pt[0]/jets.pt[0]", 'F')

    #factory.AddSpectator( "pt1 := leps.pt[0]" , 'F' )
    #factory.AddSpectator( "pt2 := leps.pt[1]" , 'F' )
    #factory.AddSpectator( "ID1 := int(leps.ID[0])" , 'I' )
    #factory.AddSpectator( "ID2 := int(leps.ID[1])" , 'I' )
    #factory.AddSpectator( "nCentralJets := Sum$(jets.pt>20 && abs(jets.eta)<2.4)" , 'I' )

    #FIXME
    setupXsecDB(pathToSUSYTools)
    from ROOT.SUSY import CrossSectionDB
    xsecDB = CrossSectionDB(pathToSUSYTools + "data/mc15_13TeV/")

    #read in training data
    openedInFileList = []

    # Read input sig
    sigList = open(sigList, "r")
    for infname in sigList:
        inFile = TFile.Open(infname[:-1])
        openedInFileList.append(inFile)

        hCutFlow = inFile.FindObjectAny("hCutFlow")
        mcEntry = hCutFlow.GetBinContent(1)

        #FIXME: hard coded extract runNum from filePath
        m = re.match(".*\.([0-9]{6})\..*", infname)
        runNum = int(m.groups()[0])
        xSECxEff = xsecDB.xsectTimesEff(runNum, 125) + xsecDB.xsectTimesEff(
            runNum, 127)  #125,127 is channel no.

        # Get the trees for training
        signal = inFile.Get("Data_")

        # Global event weights (see below for setting event-wise weights)
        #signalWeight = getXSECxEff(xsecDB, infname) * tarLumi / mcEntry
        #signalWeight = xSECxEff * tarLumi / mcEntry
        signalWeight = 1.0 * tarLumi / mcEntry  #treat diff SUSY scenario with equal weight
        if signalWeight <= 0:
            print "Encounter <=0 weight sample %s , skipped" % infname
            continue

        print "mc sig ", runNum, mcEntry, xSECxEff
        factory.AddSignalTree(signal, signalWeight)
    sigList.close()

    # Read input bkg
    bkgList = open(bkgList, "r")
    for infname in bkgList:
        inFile = TFile.Open(infname[:-1])
        openedInFileList.append(inFile)

        if "physics" in infname:
            #its real data
            print "data bkg", infname[:-1]
            background = inFile.Get("CFlip_")
            if background: factory.AddBackgroundTree(background, 1.0)
            background = inFile.Get("FakeLep_")
            if background: factory.AddBackgroundTree(background, 1.0)
        else:
            #its MC data
            hCutFlow = inFile.FindObjectAny("hCutFlow")
            mcEntry = hCutFlow.GetBinContent(1)

            #FIXME: hard coded extract runNum from filePath
            m = re.match(".*\.([0-9]{6})\..*", infname)
            runNum = int(m.groups()[0])
            xSECxEff = xsecDB.xsectTimesEff(runNum)

            # Get  trees for training
            background = inFile.Get("Data_")

            # Global event weights (see below for setting event-wise weights)
            backgroundWeight = xSECxEff * tarLumi / mcEntry
            if backgroundWeight <= 0:
                print "Encounter <=0 weight sample %s , skipped" % infname

            print "mc bkg ", runNum, mcEntry, xSECxEff
            factory.AddBackgroundTree(background, backgroundWeight)
    bkgList.close()

    # event-wise weights
    #factory.SetSignalWeightExpression( "weight" )
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression("ElSF*MuSF")
    factory.SetBackgroundWeightExpression(
        "(CFlipWeight0*FakeLepWeight0)!=1.0 ? CFlipWeight0*FakeLepWeight0  : !TMath::IsNaN(weight)? ElSF*MuSF*weight: 0.0"
    )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    # trigCut   = "sig.trigCode!=0"

    #"HLT_mu24_iloose_L1MU15" for mumu emu, "HLT_e24_lhmedium_iloose_L1EM20VH" for ee
    trigCut = "((nMu>0) && (sig.trigCode & (1<<2))) || ((nMu==0) && (sig.trigCode & (1<<26)))"

    grlCut = "evtInfo.passGRL==1"
    wCut = "weight>0 && weight<1e9"
    tauCut = "1"  # "nTau==0"  FIXME nTau not properly filled in NTUP yet..
    bjetCut = "Sum$(jets.isBJet)==0"
    cosmicCut = "Sum$(leps.isCosmic)==0"

    htCut = "(Sum$(jets.pt) + Sum$(leps.pt))>40"
    posWCut = "FakeLepWeight0>0"

    isrCut = "Sum$(jets.pt>20 && abs(jets.eta)<2.4) %s" % (
        ">0" if anaType == "doISR" else "==0")  #nCentralJets>0 or ==0
    zMassCut = "!(int(abs(leps.ID[0])) == int(abs(leps.ID[1])) && fabs(l12.m - 91.1876)<=5)"

    #commonCut = "&&".join(["(%s)"%cut for cut in [trigCut , grlCut , bjetCut, cosmicCut]])
    commonCut = "&&".join([
        "(%s)" % cut for cut in
        [trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut]
    ])
    commonCut = TCut(commonCut)

    sigCut = "&&".join([
        "(%s)" % cut for cut in
        [trigCut, grlCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut]
    ])
    sigCut = TCut(sigCut)

    bkgCut = "&&".join([
        "(%s)" % cut for cut in [
            trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut,
            cosmicCut, posWCut
        ]
    ])
    bkgCut = TCut(bkgCut)
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples

    factory.PrepareTrainingAndTestTree(
        sigCut, bkgCut,
        "nTrain_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V"
    )
    #"nTrain_Signal=0:nTrain_Background=2000:SplitMode=Random:NormMode=EqualNumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLP",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2"
        )

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=2:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
Example #29
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV']
    #     varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg',
    #                'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2']
    #     varList = ['svMass', 'dRTauTau', 'svPt', 'dRhh', 'met', 'mJJReg',
    #                 'metTau1DPhi', 'metTau2DPhi', 'metJ2DPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2']
    varList = [
        'svMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg',
        'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi',
        'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi', 'CSVJ1',
        'CSVJ2'
    ]
    for iVar in varList:
        factory.AddVariable(iVar, 'F')

    #factory.AddVariable( "NBTags",'I' )

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
#     factory.AddSpectator( "fMass")
#     factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

# Read input data
#     sigChain = r.TChain("ttTreeFinal/eventTree")
#     bkg1Chain = r.TChain("ttTreeFinal/eventTree")
#     bkg2Chain = r.TChain("ttTreeFinal/eventTree")

# Get the signal and background trees for training

    iFileSig = TFile.Open("/scratch/zmao/relaxed_regression/%s" % (infname))
    iFileBkg = TFile.Open(
        "/scratch/zmao/relaxed_regression/trainSample_relaxedsamebTag.root")

    sigChain = iFileSig.Get("eventTree")
    bkgChain = iFileBkg.Get("eventTree")

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree(sigChain, signalWeight)
    factory.AddBackgroundTree(bkgChain, 1)
    factory.SetSignalWeightExpression('triggerEff')

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(
        "iso1<1.5 && iso2<1.5 && CSVJ1 > 0.679 && CSVJ2 > 0.244 && abs(eta1)<2.1 && abs(eta2)<2.1 && charge1 + charge2 == 0"
    )
    mycutBkg = TCut("")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # Fisher discriminant (same as LD)
    #    if "Fisher" in mlist:
    #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )
    #       factory.BookMethod( TMVA.Types.kFisher, "Fisher")

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    #     gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    ChangeWeightName = 'mv /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_QCD_%i.xml' % len(
        varList)
    os.system(ChangeWeightName)
def main():

    try:
        # retrive command line options
        shortopts = "m:p:M:C:B:i:t:T:o:vh?"
        opts, args = getopt.getopt(sys.argv[1:], shortopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    methods = DEFAULT_METHODS
    mass = DEFAULT_MASS
    cat = DEFAULT_CAT
    phil = DEFAULT_PHIL
    outfname = DEFAULT_OUTFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    bkg_method = DEFAULT_BACKGROUND
    width = DEFAULT_WIDTH
    verbose = False
    test = False
    testType = DEFAULT_TEST_TYPE
    methTest = False
    testMethod = DEFAULT_TEST_METHOD
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-M", "--mass"):
            mass = int(a)
        elif o in ("-C", "--cat"):
            cat = int(a)
        elif o in ("-p", "--philosophy"):
            phil = a
        elif o in ("-B", "--background"):
            bkg_method = int(a)
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-T", "--test"):
            test = True
            temp = a.split('_')
            if len(temp) == 1:
                testType = temp[0]
                if testType != "ada" or testType != "grad":
                    print "ERROR: testType must be ada or grad not", testType
            elif len(temp) - temp.count('') == 2:
                methTest = True
                testType = temp[0]
                testMethod = temp[1]
                checkTestType(testType, testMethod)
            else:
                print "ERROR: need to give one or two test options"
                print temp
                sys.exit(1)
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    if (width == 0.02): width_str = "_2pt"
    elif (width == 0.07): width_str = "_7pt"
    mass_str = "_" + str("%3.1f" % mass)
    cat_str = "_" + str(cat)
    if cat < 0:
        cat_str = "_all"
    if test:
        if methTest:
            outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + "_" + testMethod + ".root"
        else:
            outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + ".root"
    else:
        outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + ".root"

    #treeNameSig = treeNameSig + mass_str
    #treeNameBkg = treeNameBkg + mass_str

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library)
    # load also GUI
    gROOT.SetMacroPath("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/")
    gROOT.Macro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAlogon.C")
    gROOT.LoadMacro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for
    # more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    factory.AddVariable("bdtoutput", "BDT Output", 'F')
    factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F')

    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal_train = input.Get(treeNameSig + "_train" + mass_str)
    signal_test = input.Get(treeNameSig + "_test" + mass_str)

    background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str)
    background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    factory.AddSignalTree(signal_train, signalWeight, "train")
    factory.AddBackgroundTree(background_train, backgroundWeight, "train")
    factory.AddSignalTree(signal_test, signalWeight, "test")
    factory.AddBackgroundTree(background_test, backgroundWeight, "test")

    # Set individual event weights (the variables must exist in the original
    # TTree)
    factory.SetBackgroundWeightExpression("wt")
    factory.SetSignalWeightExpression("wt")

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycut = TCut("fabs(deltaMOverM)<=" + str(width) +
                 " && bdtoutput > -0.5")  #
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    factory.PrepareTrainingAndTestTree(
        mycut, mycut,
        "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V")
    # Boosted Decision Trees
    # NEW PARAMETERS

    if (not test):
        # Likelihood
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood" + phil,
            "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD" + phil,
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )
        #factory.BookMethod( TMVA.Types.kPDERS, "MultiLikelihood"+phil,"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

        # BDT
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTada" + phil,
            "!H:!V:NTrees=200:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=1.0:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
        )
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTgrad" + phil,
            "!H:!V:NTrees=200:MaxDepth=3:BoostType=Grad:Shrinkage=0.5:UseBaggedGrad:GradBaggingFraction=1.0:SeparationType=GiniIndex:nCuts=50:NNodesMax=10"
        )

    else:  #test
        # BDT ada
        if testType == "ada":
            #if testMethod=="nTrees":
            for nTrees in [10, 50, 100, 200, 500]:
                for depth in [2, 3]:
                    factory.BookMethod(
                        TMVA.Types.kBDT, "BDT_ada" + str(phil) + "_" +
                        str(nTrees) + "t_" + str(depth) + "d",
                        "!H:!V:NTrees=" + str(nTrees) +
                        ":nEventsMin=150:MaxDepth=" + str(depth) +
                        ":BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
                    )

        # if testMethod=="depth":
        #   for depth in [2,3]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

        #if testMethod=="nCuts":
        #  for nCuts in [5,10,20,50,100,200]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_0.05b_"+str(nCuts)+"c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts="+str(nCuts)+":PruneMethod=NoPruning")

        #if testMethod=="beta":
        #  for beta in [0.05,0.5,1.]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_"+str(beta)+"b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta="+str(beta)+":SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

        # BDT grad
        if testType == "grad":
            if testMethod == "nTrees":
                for nTrees in [10, 50, 100, 200, 500]:
                    for depth in [2, 3]:
                        for shrinkage in [0.05, 0.5, 1.]:
                            factory.BookMethod(
                                TMVA.Types.kBDT, "BDT_grad" + str(phil) + "_" +
                                str(nTrees) + "t_" + str(depth) + "d_" +
                                str(shrinkage) + "s", "!H:!V:NTrees=" +
                                str(nTrees) + ":MaxDepth=" + str(depth) +
                                ":BoostType=Grad:Shrinkage=" + str(shrinkage) +
                                ":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10"
                            )

            #if testMethod=="depth":
            # for depth in [2,3]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

            #if testMethod=="shrinkage":
            #  for shrinkage in [0.05,0.1,0.5,1.]:
            #    factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_"+str(shrinkage)+"s_1gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage="+str(shrinkage)+":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10")

            #if testMethod=="bagFrac":
            #  for bagFrac in [0.05,0.1,0.5,1.]:
            #   factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_"+str(bagFrac)+"gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction="+str(bagFrac)+":SeparationType=GiniIndex:nCuts=50:NNodesMax=10")

            #if testMethod=="nCuts":
            # for nCuts in [5,10,20,50,100,200]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_"+str(nCuts)+"c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts="+str(nCuts)+":NNodesMax=10")

            #if testMethod=="nNM":
            # for nNM in [10,100,500,1000,10000]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_50c_"+str(nNM)+"nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax"+str(nNM))

    # --------------------------------------------------------------------------------------------------
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    #factory.OptimizeAllMethods()
    factory.TrainAllMethods()
    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"