Python TMVA.gConfig Examples

Programming Language: Python

Namespace/Package Name: ROOT

Class/Type: TMVA

Method/Function: gConfig

Examples at hotexamples.com: 19

Python TMVA.gConfig - 19 examples found. These are the top rated real world Python examples of ROOT.TMVA.gConfig extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataLoader(30)

Factory(30)

Reader(28)

gConfig(15)

TMVAGui(6)

CrossValidation(2)

correlations(2)

efficiencies(2)

mvas(2)

variables(2)

CreateVariableTransforms(1)

GeneticFitterMod(1)

TMVARegGui(1)

TransformationHandler(1)

VariableImportance(1)

mvaeffs(1)

Example #1

Show file

def Evaluate(outdir):

  sys.stdout = open(outdir + '/tmva.log', 'w') 

  # Output file
  output = TFile(outdir + '/tmva.root', 'RECREATE')

  # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
  # All TMVA output can be suppressed by removing the "!" (not) in 
  # front of the "Silent" argument in the option string
  factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" )

  # Set the variables use for the analysis
  input = open(outdir + '/inputvars.txt')
  for variable in input.readlines():
    factory.AddVariable(variable[:-1], 'F')

  # Set the weight directory
  TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights"

  # Limit the creation of correlation plots
  TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20  

  # Set the input file with signal and background events
  factory.SetInputTrees(
    outdir + '/signals.txt',
    outdir + '/backgrounds.txt'
  )

  cutsig = TCut('')
  cutbkg = TCut('')
  
  factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" )   

  factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
    "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) 

  # Train MVAs
  factory.TrainAllMethods()

  # Test MVAs
  factory.TestAllMethods()

  # Evaluate MVAs
  factory.EvaluateAllMethods()

  # Save the output.
  output.Close()

Example #2

Show file

File: tests.py Project: otmanway/root_numpy

    def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs):
        # (re)configure settings since deleting a previous Factory resets all
        # this. This is poor design, TMVA.
        config = TMVA.gConfig()
        config.GetIONames().fWeightFileDir = self.tmpdir
        config.SetSilent(True)
        config.SetDrawProgressBar(False)
        self.factory.DeleteAllMethods()

        extra_kwargs = dict()
        if self.task == "Regression":
            func = rnp.tmva.add_regression_events
        else:
            func = rnp.tmva.add_classification_events
            extra_kwargs["signal_label"] = signal_label

        # test exceptions
        assert_raises(TypeError, func, object(), X, y)
        assert_raises(ValueError, func, self.factory, X, y[: y.shape[0] / 2])
        if weights is not None:
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[: weights.shape[0] / 2])
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis])

        assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1])
        assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]])

        func(self.factory, X, y, weights=weights, **extra_kwargs)
        if X_test is None:
            X_test = X
            y_test = y
            weights_test = weights
        func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs)

        self.factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents")
        options = []
        for param, value in kwargs.items():
            if value is True:
                options.append(param)
            elif value is False:
                options.append("!{0}".format(param))
            else:
                options.append("{0}={1}".format(param, value))
        options = ":".join(options)
        self.factory.BookMethod(self.method, self.method, options)
        self.factory.TrainAllMethods()

Example #3

Show file

File: tests.py Project: ziggi0703/root_numpy

    def fit(self, X, y, X_test=None, y_test=None,
            weights=None, weights_test=None,
            signal_label=None, **kwargs):
        # (re)configure settings since deleting a previous Factory resets all
        # this. This is poor design, TMVA.
        config = TMVA.gConfig()
        config.GetIONames().fWeightFileDir = self.tmpdir
        config.SetSilent(True)
        config.SetDrawProgressBar(False)
        self.factory.DeleteAllMethods()

        extra_kwargs = dict()
        if self.task == 'Regression':
            func = rnp.tmva.add_regression_events
        else:
            func = rnp.tmva.add_classification_events
            extra_kwargs['signal_label'] = signal_label

        # test exceptions
        assert_raises(TypeError, func, object(), X, y)
        assert_raises(ValueError, func,
                      self.factory, X, y[:y.shape[0] / 2])
        if weights is not None:
            assert_raises(ValueError, func, self.factory, X, y,
                          weights=weights[:weights.shape[0]/2])
            assert_raises(ValueError, func, self.factory, X, y,
                          weights=weights[:, np.newaxis])

        assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1])
        assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]])

        func(self.factory, X, y, weights=weights, **extra_kwargs)
        if X_test is not None and y_test is not None:
            func(self.factory, X_test, y_test,
                 weights=weights_test, test=True, **extra_kwargs)

        self.factory.PrepareTrainingAndTestTree(
            TCut('1'), 'NormMode=EqualNumEvents')
        options = ':'.join(['{0}={1}'.format(param, value)
                            for param, value in kwargs.items()])
        if options:
            options = ':' + options
        self.factory.BookMethod(self.method, self.method, options)
        self.factory.TrainAllMethods()

Example #4

Show file

def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    mass = DEFAULT_MASS
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-k", "--mass"):
            mass = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass
    outfname = "weights/TMVA_" + Note + ".root"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
#     gROOT.SetMacroPath( "./" )
#     gROOT.Macro       ( "./TMVAlogon.C" )
#    gROOT.LoadMacro   ( "./TMVAGui.C" )

# Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    #     factory = TMVA.Factory( "TMVAClassification", outputFile,
    #                             "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    for iVar in varList:
        if iVar[0] == 'NJets_singleLepCalc':
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I')
        else:
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    print 'mass point ' + mass
    infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % (
        mass)
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")
    #    os.exits(1)
    #BDT machinary
    factory.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir + varsList.bkg[i]))
        print inputDir + varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        factory.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression(weightStrS)
    factory.SetBackgroundWeightExpression(weightStrB)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig,
        mycutBkg,
        #                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" )
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # bdtSetting for "BDT"
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTMitFisher"
    bdtFSetting = '!H:!V:NTrees=%s' % nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTG"
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray'  #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTB"
    bdtBSetting = '!H:!V:NTrees=%s' % nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTD"
    bdtDSetting = '!H:!V:NTrees=%s' % nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
    #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m

    #BOOKING AN ALGORITHM
    if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting)
    if methods == "BDTG":
        factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting)
    if methods == "BDTMitFisher":
        factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting)
    if methods == "BDTB":
        factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting)
    if methods == "BDTD":
        factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting)
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    #
    #     print "=== wrote root file %s\n" % outfname
    #     print "=== TMVAClassification is done!\n"

    # save plots:
    os.chdir('weights/' + Note)
    #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies
    gROOT.SetBatch(1)
    TMVA.efficiencies(
        "../../" + outfname
    )  #Classifier Background Rejection vs Signal Efficiency (ROC curve)
    #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve)
    TMVA.mvas("../../" + outfname,
              0)  #Classifier Output Distributions (test sample)
    TMVA.correlations(
        "../../" + outfname)  #Input Variable Linear Correlation Coefficients
    TMVA.variables("../../" + outfname)  #Input variables (training sample)
    #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed)
    #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed)
    if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname)
    #     os.chdir('plots')
    #     try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png')
    #     except: pass
    #     #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png')
    #     #except: pass
    #     try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png')
    #     except: pass
    #     os.system('rm *.eps')
    print "DONE"

Example #5

Show file

File: vbf_bdt_trainer.py Project: jbueghly/hzg_analysis

        'BDTD': 0,
        'BDTF': 0,
        'MLP': 0,
        'MLPBFGS': 0,
        'MLPBNN': 0,
        'CFMlpANN': 0,
        'TMlpANN': 0
    }

    factory = t.Factory(
        'vbf_bdt_combined_james_current', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification'
    )
    factory.Print()

    t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir
    dataloader = t.DataLoader(".")

    dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F')
    dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F')
    dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F')
    dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F')
    dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F')
    dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F')
    dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F')
    dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james',
                           'F')
    dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F')
    dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F')

    # trees for training

Example #6

Show file

loader.PrepareTrainingAndTestTree(
    mycutSig, mycutBkg,
    "nTrain_Signal=" + str(NSIG) +\
  ":nTrain_Background=" + str(NBKG) +\
  ":nTest_Signal=" + str(NSIG_TEST) +\
  ":nTest_Background=" + str(NBKG_TEST) +\
  ":SplitMode=Random:NormMode=NumEvents:!V"
)

factory = TMVA.Factory(
    "Optimization",
    '!V:!ROC:!Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
)

(TMVA.gConfig().GetIONames()).fWeightFileDir = '/weights'

kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + modelName +\
               ':SaveBestOnly=true' +\
               ':NumEpochs=' + str(epochs) +\
               ':BatchSize=' + str(batchSize) +\
               ':TriesEarlyStopping=' + str(patience)

factory.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting)

factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

ROC = factory.GetROCIntegral(dataset + "/optimize_" + outf_key, "PyKeras")

Example #7

Show file

File: CutsOptimization.py Project: tahuang1991/delphes

def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    factory.AddVariable( "MMC_h2massweight1_prob", "MMC_h2massweight1_prob", "", 'F' ) ##ADDED

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run()

Example #8

Show file

File: TMVAClassification_Training.py Project: rhemmer/TTTT_TMVA_DNN

def main():  # runs the program
    checkRootVer()  # check that ROOT version is correct

    try:  # retrieve command line options
        shortopts = "d:o:v:w:y:h?"  # possible command line options
        longopts = [
            "dataset=", "option=", "where=", "year=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array(
        [  # Stores the command line arguments   
            ['-d', '--dataset', 'dataset', 'dataset'],
            ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017],
            ['-o', '--option', 'option', 0],
            ['-v', '--verbose', 'verbose', True]
        ],
        dtype="object")

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[index, 3] = str(
                arg)  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some containers
    bkg_list = []
    bkg_trees_list = []
    sig_list = []
    sig_trees_list = []

    # Initialize some variables after reading in arguments
    option_index = np.where(myArgs[:, 2] == 'option')[0][0]
    dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    DATASETPATH = myArgs[dataset_index][3]
    DATASET = DATASETPATH.split("/")[0]
    OPTION = myArgs[option_index][3]
    VERBOSE = myArgs[verbose_index][3]
    WHERE = myArgs[where_index][3]
    YEAR = myArgs[year_index][3]

    if WHERE == "lpc":
        if YEAR == 2017:
            inputDir = varsList.inputDirLPC2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirLPC2018
    else:
        if YEAR == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirBRUX2018

    if OPTION == "0":
        print("Using Option 0: default varList")
        varList = varsList.varList["DNN"]

    elif OPTION == "1":
        print("Using Option 1: selected data from {}".format(DATASETPATH))
        varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines()
        varList = []
        START = False
        for line in varsListHPO:
            if START == True:
                varList.append(str(line.strip()))
            if "Variable List:" in line:
                START = True

    numVars = len(varList)
    outf_key = str("Keras_" + str(numVars) + "vars")
    OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root"
    outputfile = TFile(OUTF_NAME, "RECREATE")

    # initialize and set-up TMVA factory

    factory = TMVA.Factory(
        "Training", outputfile,
        "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    factory.SetVerbose(bool(myArgs[verbose_index, 3]))
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key

    # initialize and set-up TMVA loader

    loader = TMVA.DataLoader(DATASET)

    if OPTION == "0":
        for var in varList:
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], 'I')
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")
    if OPTION == "1":
        for var in varList:
            if var == "NJets_MultiLepCalc":
                loader.AddVariable(var, "", "", "I")
            else:
                loader.AddVariable(var, "", "", "F")

    # add signal files
    if YEAR == 2017:
        for i in range(len(varsList.sig2017_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.sig2018_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add background files
    if YEAR == 2017:
        for i in range(len(varsList.bkg2017_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.bkg2018_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    loader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    ######################################################
    ######################################################
    ######                                          ######
    ######            K E R A S   D N N             ######
    ######                                          ######
    ######################################################
    ######################################################
    HIDDEN = 0
    NODES = 0
    LRATE = 0.
    PATTERN = ""
    REGULATOR = ""
    ACTIVATION = ""
    BATCH_SIZE = 0
    # modify this when implementing hyper parameter optimization:
    model_name = 'TTTT_' + str(numVars) + 'vars_model.h5'

    EPOCHS = 100
    PATIENCE = 20

    # edit these based on hyper parameter optimization results
    if OPTION == "0":
        HIDDEN = 3
        NODES = 100
        LRATE = 0.01
        PATTERN = 'static'
        REGULATOR = 'none'
        ACTIVATION = 'relu'
        BATCH_SIZE = 256
    if OPTION == "1":
        datasetDir = os.listdir(DATASETPATH)
        for file in datasetDir:
            if "params" in file: optFileName = file
        optFile = open(DATASETPATH + "/" + optFileName, "r").readlines()
        START = False
        for line in optFile:
            if START == True:
                if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip())
                if "Initial" in line: NODES = int(line.split(":")[1].strip())
                if "Batch" in line:
                    BATCH_SIZE = 2**int(line.split(":")[1].strip())
                if "Learning" in line:
                    LRATE = float(line.split(":")[1].strip())
                if "Pattern" in line: PATTERN = str(line.split(":")[1].strip())
                if "Regulator" in line:
                    REGULATOR = str(line.split(":")[1].strip())
                if "Activation" in line:
                    ACTIVATION = str(line.split(":")[1].strip())
            if "Optimized Parameters:" in line: START = True
    kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \
                   ':SaveBestOnly=true' + \
                   ':NumEpochs=' + str(EPOCHS) + \
                   ':BatchSize=' + str(BATCH_SIZE) + \
                   ':TriesEarlyStopping=' + str(PATIENCE)

    model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION,
                        numVars)
    model.save(model_name)
    model.summary()

    factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting)

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputfile.Close()

    print("Finished training in " + str(
        (time.time() - START_TIME) / 60.0) + " minutes.")

    ROC = factory.GetROCIntegral(DATASET, 'PyKeras')
    print('ROC value is: {}'.format(ROC))
    if OPTION == "1":
        varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a")
        varsListHPOtxt.write("ROC Value: {}".format(ROC))

Example #9

Show file

import sys
import os
from ROOT import TMVA, TFile, TCut
from config import cfg, variables_iso_only
from os.path import join
import uproot
import root_pandas

TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()).fNbinsXOfROCCurve = 400

out_dir_base = join(cfg["out_dir"], cfg['submit_version'])

max_n_per_class = 200000

for idname in cfg["trainings"]:

    for training_bin in cfg["trainings"][idname]:

        print("Process training pipeline for {0} {1}".format(idname, training_bin))

        out_dir = join(out_dir_base, idname, training_bin, "legacy")

        if not os.path.exists(out_dir):
            os.makedirs(join(out_dir))

        feature_cols = cfg["trainings"][idname][training_bin]["variables"]

        outfileName = join(out_dir, "TMVA.root")
        print("---> Working with OutfileName = " + outfileName);

Example #10

Show file

def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:s:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    SeedN = DEFAULT_SEED
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-s", "--seed"):
            SeedN = long(a)
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth
    outfname = "dataset/weights/TMVA_" + Note + ".root"

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    fClassifier = TMVA.Factory(
        "VariableImportance",
        "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"
    )
    str_xbitset = '{:053b}'.format(SeedN)

    seeddl = TMVA.DataLoader(str_xbitset)

    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'

    index = 52

    for iVar in varList:
        if (str_xbitset[index] == '1'):
            seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F')
            print iVar[0]
        index = index - 1

    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")

    seeddl.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    bkgList = varsList.bkg

    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir + bkgList[i]))
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        seeddl.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1

    seeddl.SetSignalWeightExpression(weightStrS)
    seeddl.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    seeddl.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting)
    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT")
    print "ROC-integral : ", str_xbitset, " ", SROC
    print "SEED " + str_xbitset + " DONE"
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()
    print "=================================================================="
    print "=================================================================="

Example #11

Show file

def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
#!    gROOT.LoadMacro   ( "./TMVAGui.C" )
 
   
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run()

Example #12

Show file

File: TMVA_tutorial_regression_tmva.py Project: LailinXu/hepstat-tutorial

##  * Train a BDT with TMVA
##
## \macro_image
## \macro_output
## \macro_code
##
## \author Lailin XU
## Modified from [RegressionKeras.py](https://root.cern/doc/master/RegressionKeras_8py.html) and [TMVARegression.C](https://root.cern/doc/master/TMVARegression_8C.html)

from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile
 
# Setup TMVA
TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = 5 
 
outfileName = 'TMVA_tutorial_reg_1.root'
output = TFile.Open(outfileName, 'RECREATE')
factory = TMVA.Factory('TMVARegression', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Regression')
 
# Load data
trfile = "SM_ttbar.root"
if not isfile('tmva_reg_example.root'):
    call(['curl', '-L', '-O', 'http://root.cern.ch/files/tmva_reg_example.root'])
 
data = TFile.Open(trfile)
if not data:
  print("Error! file not opened", trfile)
trname = "tree"
tree = data.Get(trname)

Example #13

Show file

    test_size=0.2,
    #train_size=10000,
    #test_size=5000,
    random_state=0)
# Training
if not args.quiet:
    print 'start training ...'
if args.timeit:
    start = timer()

output = TFile(args.outdir + 'tmva_output.root', 'recreate')
factory = TMVA.Factory(
    'TMVA', output, 'AnalysisType=Classification:'
    '!V:Silent:!DrawProgressBar')
dataloader = TMVA.DataLoader("")
TMVA.gConfig().GetIONames().fWeightFileDir = args.outdir + 'weights/'

for v in var:
    vtype = 'I' if v in [
        'nJet', 'tau0_decaymode', 'tau1_decaymode', 'ntags', 'ntags_loose'
    ] else 'F'
    dataloader.AddVariable(v, vtype)

add_classification_events(dataloader, x_train, y_train, weights=w_train)
add_classification_events(dataloader,
                          x_test,
                          y_test,
                          weights=w_test,
                          test=True)

norm = 'None'

Example #14

Show file

File: TMVAClassification_VariableImportance.py Project: furmada/TTTT_TMVA_DNN

def main():  # runs the program
    try:  # retrieve command line options
        shortopts = "o:w:y:v:s:h?"  # possible command line options
        longopts = [
            "outputfile=", "where=", "year=", "verbose", "seed=", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array([  # Stores the command line arguments    
        ['-o', '--outputfile', 'outfname', DEFAULT_OUTFNAME],
        ['-v', '--verbose', 'verbose', True],
        ['-w', '--where', 'where', "lpc"],
        ['-y', '--year', 'year', 2017],
        ['-s', '--seed', 'SeedN', DEFAULT_SEED],
    ])

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[
                index,
                3] = arg  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some variables after reading in arguments
    SeedN_index = np.where(myArgs[:, 2] == 'SeedN')[0][0]
    outfname_index = np.where(myArgs[:, 2] == 'outfname')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    seed = myArgs[SeedN_index, 3]
    where = myArgs[where_index, 3]
    year = int(myArgs[year_index, 3])
    varList = varsList.varList["DNN"]
    var_length = len(varList)

    str_xbitset = '{:0{}b}'.format(long(myArgs[SeedN_index, 3]), var_length)
    nVars = str_xbitset.count('1')
    outf_key = "DNN_" + str(nVars) + "vars"
    myArgs[outfname_index, 3] = "dataset/weights/TMVA_" + outf_key + ".root"

    print("Seed: {}".format(str_xbitset))

    outputfile = TFile(myArgs[outfname_index, 3], 'RECREATE')

    checkRootVer()  # check that ROOT version is correct

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    sig_list = []
    sig_trees_list = []
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []

    if where == "brux":
        if year == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif year == 2018:
            inputDir = varsList.inputDirBRUX2018
    else:
        inputDir = varsList.inputDirCondor

    # Set up TMVA
    ROOT.TMVA.Tools.Instance()
    ROOT.TMVA.PyMethodBase.PyInitialize()

    fClassifier = TMVA.Factory(
        'VariableImportance',
        '!V:!ROC:Silent:!Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
    )

    fClassifier.SetVerbose(bool(myArgs[verbose_index, 3]))

    loader = TMVA.DataLoader("dataset/" + str_xbitset)

    for indx, var in enumerate(varList):
        if (str_xbitset[indx] == '1'):
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], "I")
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")

    # add signals to loader
    if year == 2017:
        for i in range(len(varsList.sig2017_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])
    elif year == 2018:
        for i in range(len(varsList.sig2018_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add backgrounds to loader
    if year == 2017:
        for i in range(len(varsList.bkg2017_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)
            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif year == 2018:
        for i in range(len(varsList.bkg2018_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    # set signal and background weights
    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    # set cut thresholds for signal and background
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    NSIG = 0
    NSIG_TEST = 0
    NBKG = 0
    NBKG_TEST = 0

    loader.PrepareTrainingAndTestTree(
      mycutSig, mycutBkg,
      "nTrain_Signal=" + str(NSIG) + \
      ":nTrain_Background=" + str(NBKG) + \
      ":nTest_Signal=" + str(NSIG_TEST) + \
      ":nTest_Background=" + str(NBKG_TEST) + \
      ":SplitMode=Random:NormMode=NumEvents:!V"
    )

    #####################################################
    #####################################################
    ######                                         ######
    ######            K E R A S   D N N            ######
    ######                                         ######
    #####################################################
    #####################################################

    model_name = "TTTT_TMVA_model.h5"

    model = Sequential()
    model.add(
        Dense(100,
              input_dim=nVars,
              kernel_initializer="glorot_normal",
              activation="relu"))
    for i in range(2):
        model.add(BatchNormalization())
        model.add(
            Dense(100, kernel_initializer="glorot_normal", activation="relu"))
    model.add(Dense(2, activation="sigmoid"))

    model.compile(loss="categorical_crossentropy",
                  optimizer=Adam(),
                  metrics=["accuracy"])

    model.save(model_name)
    model.summary()

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    kerasSetting = "!H:!V:VarTransform=G:FilenameModel=" + model_name + \
     ":NumEpochs=15:BatchSize=512" # the trained model has to be specified in this string

    # run the classifier
    fClassifier.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras",
                           kerasSetting)

    (TMVA.gConfig().GetIONames()
     ).fWeightFileDir = str_xbitset + "/weights/" + outf_key
    #print("New weight file directory: {}".format((TMVA.gConfig().GetIONames()).fWeightFileDir))

    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral("dataset/" + str_xbitset, "PyKeras")
    print("ROC-integral: {}".format(SROC))
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()

    outputfile.Close()

Example #15

Show file

File: TMVA_tutorial_classification_tmva.py Project: LailinXu/hepstat-tutorial

##
## \macro_image
## \macro_output
## \macro_code
##
## \author Lailin XU
## Modified from [ClassificationKeras.py](https://root.cern/doc/master/ClassificationKeras_8py.html) and [TMVAClassification.C](https://root.cern/doc/master/TMVAClassification_8C.html)

from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile

# Setup TMVA
# =======================
TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()
 ).fMaxNumOfAllowedVariablesForScatterPlots = 5

outfileName = 'TMVA_tutorial_cla_1.root'
output = TFile.Open(outfileName, 'RECREATE')
# Create the factory object. Later you can choose the methods whose performance you'd like to investigate. The factory is
#    the only TMVA object you have to interact with
#
#    The first argument is the base of the name of all the weightfiles in the directory weight/
#    The second argument is the output file for the training results
factory = TMVA.Factory(
    "TMVAClassification", output,
    "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
)

# Load data

Example #16

Show file

File: TMVAClassification.py Project: apana/usercode

def main():

    try:
        # retrive command line options
        shortopts  = "a:o:r:vh?"
        longopts   = ["analysis=","outputfile=", "regression=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    _regression = REGRESSION

    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-r", "--regression"):
            _regression = True
        elif o in ("-v", "--verbose"):
            verbose = True

            
    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA


    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_" + _analysis

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    if _analysis == "Dijet":

        if not _regression:
            factory.AddVariable("H_mass := H.mass", 'F');
            factory.AddVariable("H_pt :=H.pt", 'F');
            factory.AddVariable("hJet_pt1 := hJet_pt[0]", 'F')
            factory.AddVariable("hJet_pt2 := hJet_pt[1]", 'F')
        else:
            factory.AddVariable("HCorr_mass := newHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newHiggsPt", 'F');
            factory.AddVariable("hJ1Corr_pt := hJet_genPtReg0", 'F');
            factory.AddVariable("hJ2Corr_pt := hJet_genPtReg1", 'F');

        factory.AddVariable("V_pt :=V.pt", 'F');
        factory.AddVariable("H_dR := H.dR", 'F');
        factory.AddVariable("hJ12_MaxCsv := max(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("hJ12_MinCsv := min(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("HV_dPhi := HVdPhi", 'F');
        factory.AddVariable("H_dEta := H.dEta", 'F');
        factory.AddVariable("NAddJet:=Sum$(aJet_pt>20 && abs(aJet_eta)<4.5)", 'I' );
        factory.AddVariable("dPull := deltaPullAngle", 'F');
        

        # You can add so-called "Spectator variables", which are not used in the MVA training, 
        # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
        # input variables, the response values of all trained MVAs, and the spectator variables
        #factory.AddSpectator("hJet_pt1 := hJet_pt[0]", 'F');
        #factory.AddSpectator("hJet_pt2 := hJet_pt[1]", 'F');

    elif _analysis == "Subjet":

        if not _regression:
            factory.AddVariable("H_mass := FatH.filteredmass", 'F');
            factory.AddVariable("H_pt   := FatH.filteredpt", 'F');
            factory.AddVariable("SJ1_pt := fathFilterJets_pt[0]", 'F');
            factory.AddVariable("SJ2_pt := fathFilterJets_pt[1]", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F');
        else:
            factory.AddVariable("HCorr_mass := newfatHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newfatHiggsPt", 'F');
            factory.AddVariable("SJ1Corr_pt := fathFilterJets_genPtReg0", 'F');
            factory.AddVariable("SJ2Corr_pt := fathFilterJetsx_genPtReg1", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); #change later
            
        factory.AddVariable("V_pt   := V.pt", 'F');
        factory.AddVariable("HV_dPhi := " +\
                             "FatH.filteredphi - V.phi > pi ? " +\
                             "abs(FatH.filteredphi - V.phi - 2*pi) : " +\
                             "FatH.filteredphi - V.phi < -pi ? " +\
                             "abs(FatH.filteredphi - V.phi + 2*pi) : " +\
                             "abs(FatH.filteredphi - V.phi)", 'F' )

        factory.AddVariable("SJ1_csv := fathFilterJets_csv[0]", 'F');
        factory.AddVariable("SJ2_csv := fathFilterJets_csv[1]", 'F');
        factory.AddVariable("SJ3_csv := Alt$(fathFilterJets_csv[2],0)", 'F');

        factory.AddVariable("SJ12_dEta := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fabs(fathFilterJets_eta[0] - fathFilterJets_eta[1] )", 'F');
        
        factory.AddVariable("SJ13_dEta := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "abs( fathFilterJets_eta[0] - Alt$(fathFilterJets_eta[2],0))", 'F');

        factory.AddVariable("SJ12_dPhi := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] > pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] - 2*pi) : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] < -pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] + 2*pi) : " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1])", 'F');

        factory.AddVariable("SJ13_dPhi := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) > pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) - 2*pi) : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) < -pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) + 2*pi) : " +\
                                "abs(fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("SJ12_dR := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],fathFilterJets_eta[1],fathFilterJets_phi[1])", 'F');
        
        factory.AddVariable("SJ13_dR := nfathFilterJets < 3 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],Alt$(fathFilterJets_eta[2],0),Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("NAddJet:= " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "Sum$(aJet_pt>20 && abs(aJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],aJet_eta,aJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],aJet_eta,aJet_phi)>0.3)+Sum$(hJet_pt>20 && abs(hJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],hJet_eta,hJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],hJet_eta,hJet_phi)>0.3)", 'I' );
       

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    ## Get the Signal and Background trees
    for Sample in SAMPLES.keys():
        SampleInfo=SAMPLES[Sample]

        SampleType=SampleInfo[0] # signal or background
        infile=os.path.join(INPUTDIR,SampleInfo[1])
        xs=SampleInfo[2]

        ## get number of step 1 events
        f=TFile.Open(infile)
        h = f.Get("Count")
        nEVT=int(h.GetBinContent(1))

        wt  =xs/(nEVT)        
        print Sample,": ",infile
        print "XS:nEVT:wt: ", xs,nEVT,wt

        theTree      = f.Get( TREE )
        if SampleType == "S":
            factory.AddSignalTree    ( theTree, wt )
        elif SampleType == "B":
            factory.AddBackgroundTree( theTree, wt )        
        else:
            print "Trouble extracting SampleType for this sample"
            sys.exit(1)
        

    # table10 AN-2011/430
    if _analysis == "Dijet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "H.HiggsFlag > 0"        + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.244"  + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " 

        if not _regression:
            cutString += \
                "hJet_pt[0] > 20.0"      + " && " +\
                "hJet_pt[1] > 20.0"      + " && " +\
                "H.mass > 80.0"          + " && " +\
                "H.mass < 150.0"         

        else:
            cutString += \
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "newHiggsMass > 80.0 && newHiggsMass < 150.0"

    elif _analysis == "Subjet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "FatH.FatHiggsFlag > 0"  + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "nfathFilterJets >= 2"   + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244"  + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && "

        if not _regression:
            cutString += \
                "fathFilterJets_pt[0] > 20.0"      + " && " +\
                "fathFilterJets_pt[1] > 20.0"      + " && " +\
                "FatH.filteredmass > 80.0"         + " && " +\
                "FatH.filteredmass < 150.0"     
        else:
            cutString += \
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "newfatHiggsMass > 80.0 && newfatHiggsMass < 150.0"


    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    print cutString
    mycutSig = TCut( cutString ) 
    mycutBkg = TCut( cutString ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples

    prepareOptions="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V"
    #prepareOptions="SplitMode=Random:!V"
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, prepareOptions)


    bdtOptions = \
        "!H"                       + ":" +\
        "!V"                       + ":" +\
        "NTrees=850"               + ":" +\
        "nEventsMin=150"           + ":" +\
        "MaxDepth=3"               + ":" +\
        "BoostType=AdaBoost"       + ":" +\
        "AdaBoostBeta=0.3"         + ":" +\
        "SeparationType=GiniIndex" + ":" +\
        "nCuts=20"                 + ":" +\
        "PruneMethod=NoPruning"
    # "PruneMethod=CostComplexity"

    # 

    print bdtOptions

    factory.BookMethod( TMVA.Types.kBDT, "BDT", bdtOptions)
   

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % _outfname
    print "=== TMVAClassification is done!\n"

Example #17

Show file

def TMVAClassification(methods,
                       sigfname,
                       bkgfname,
                       optname,
                       channel,
                       trees,
                       verbose=False):  #="DecayTree,DecayTree"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Define trees
    trees = trees.split(",")
    if len(trees) - trees.count('') != 2:
        print "ERROR: need to give two trees (each one for signal and background)"
        print trees
        sys.exit(1)
    treeNameSig = trees[0]
    treeNameBkg = trees[1]

    # Print output file and directory
    outfname = "TMVA_%s_%s.root" % (channel, optname)
    myWeightDirectory = "weights_%s_%s" % (channel, optname)
    print "=== TMVAClassification: output will be written to:"
    print "=== %s" % outfname
    print "=== %s" % myWeightDirectory

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    #gROOT.SetMacroPath( "./" )
    #gROOT.Macro       ( "./tmva/test/TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary??

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Setup TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Load data
    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory
    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    print "*** Training on channel:"
    print "*** %s" % channel
    print "***"
    '''
    if channel == "1":
        #dataloader.AddVariable( "pplus_ProbNNp",                      "Prob(p^{+})",                             "",     'F' );
        #dataloader.AddVariable( "Kminus_ProbNNk",                     "Prob(K^{-})",                             "",     'F' );

        dataloader.AddVariable( "pplus_PT",                           "P_{T}(p^{+})",                             "MeV", 'F' );
        dataloader.AddVariable( "Kminus_PT",                          "P_{T}(K^{-})",                             "MeV", 'F' );
        dataloader.AddVariable( "gamma_PT",                           "P_{T}(#gamma)",                            "MeV", 'F' );
        dataloader.AddVariable( "Lambda_1520_0_PT",                   "P_{T}(#Lambda(1520))",                     "MeV", 'F' );
        dataloader.AddVariable( "B_PT",                               "P_{T}(#Lambda_{b})",                       "MeV", 'F' );

        dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta",             "MeV", 'F' );
        dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );
        dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}",                          "MeV", 'F' );

        dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' );
        dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' );

        dataloader.AddVariable( "pplus_IPCHI2_OWNPV",                 "#chi^{2}_{IP}(p^{+})",                       ""  ,  'F' );
        dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable( "B_IPCHI2_OWNPV",                     "#chi^{2}_{IP}(#Lambda_{b})",                 ""  ,  'F' );
        #dataloader.AddVariable( "gamma_IPCHI2_OWNPV",                 "IP #chi^{2}(#gamma)",                       ""  ,  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV",             "IP(#Lambda(1520))",                        "mm",  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV",         "IP#chi^{2}(#Lambda(1520))",               "",    'F' );
        
        dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2",           "#chi^{2}_{vertex}(#Lambda(1520))",           ""  ,  'F' );
        dataloader.AddVariable( "B_OWNPV_CHI2",                       "#chi^{2}_{vertex}(#Lambda_{b})",             ""  ,  'F' );
        dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' );
        
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda(1520))",               "",    'F' );
        dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' );
    '''

    if channel == "2":
        dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F')
        dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F')
        dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F')
        dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV",
                               'F')
        dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F')

        dataloader.AddVariable(
            "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)",
            "#beta", "", 'F')
        #ok
        #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );#BDT learned Mass check1
        dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P",
                               "P_{tot,2}", "MeV", 'F')
        #ok

        #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta
        dataloader.AddVariable(
            "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)",
            "#eta(K^{-})-#eta(p^{+})", "", 'F')
        dataloader.AddVariable(
            "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)",
            "#eta(#Lambda*)", "", 'F')
        dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)",
                               "#eta(#gamma)", "", 'F')

        dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})",
                               "", 'F')
        #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})",
                               "", 'F')
        dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV",
                               "#chi^{2}_{IP}(#Lambda*)", "", 'F')

        dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda*)", "", 'F')
        dataloader.AddVariable("B_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F')
        #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF",    "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#BDT learned Mass check1
        #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF",          "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#put it out because array

        #dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' ); #not used by BDT
        #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV",           "DIRA(#Lambda*)",                        ""  ,  'F' ); #not used
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda*)",               "",    'F' ); #not used
        #dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' ); #not used

    # Add Spectator Variables: not used for Training but written in final TestTree
    #dataloader.AddSpectator( "B_M",                                   "M(#Lambda_{b})",                           "MeV");
    #dataloader.AddSpectator( "Lambda_1520_0_M",                       "M(#Lambda*)",                         "MeV");

    # Read input data
    if gSystem.AccessPathName(sigfname) != 0:
        print "Can not find %s" % sigfname
    if gSystem.AccessPathName(bkgfname) != 0:
        print "Can not find %s" % bkgfname

    inputSig = TFile.Open(sigfname)
    inputBkg = TFile.Open(bkgfname)

    # Get the signal and background trees for training
    signal = inputSig.Get(treeNameSig)
    background = inputBkg.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree(signal, signalWeight)
    dataloader.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    dataloader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : dataloader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: dataloader.SetBackgroundWeightExpression("weight1*weight2");
    #dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut(
        ""
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample
    #print(sigfname + str( mycutSig ) + treeNameSig)

    mycutBkg = TCut(
        "B_M>6120"
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" )
    #print(bkgfname + str( mycutBkg ) + treeNameBkg)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            dataloader,
            TMVA.Types.kMLP,
            "MLP",
            #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try
            "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )  #Old

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2"
        )  #Settings3
        #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote
        #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old

    if "BDT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    print("FLAG 0")
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)

Example #18

Show file

File: TMVAClassificationPyKeras.py Project: Jangbae/TMVA_DNN

def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"

Example #19

Show file

File: TMVARegression.py Project: apana/usercode

def TMVARegression():

    try:
        # retrieve command line options
        shortopts  = "a:o:vh?"
        longopts   = ["analysis=","outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )
        
    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)
        
    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-v", "--verbose"):
            verbose = True

    
            
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )

    #   // Create the factory object. Later you can choose the methods
    #   // whose performance you'd like to investigate. The factory will
    #   // then run the performance analysis for you.
    #   //
    #   // The first argument is the base of the name of all the
    #   // weightfiles in the directory weights_Reg/ 
    #   //
    #   // The second argument is the output file for the training results
    #   // All TMVA output can be suppressed by removing the "!" (not) in 
    #   // front of the "Silent" argument in the option string
    factory = TMVA.Factory ("TMVARegression", outputFile, 
                                                   "!V:!Silent:Color:DrawProgressBar" )
    # Set verbosity
    factory.SetVerbose( verbose )
    
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_Reg_8TeV" + "_" + _analysis

    if _analysis == "Dijet":
  
        factory.AddVariable("hJet_pt", "hJet_pt", "units", 'F')
        factory.AddVariable("hJet_eta", "hJet_eta", "units", 'F')
        factory.AddVariable("hJet_phi", "hJet_phi", "units", 'F')
        factory.AddVariable("hJet_e", "hJet_e", "units", 'F')
        factory.AddVariable("hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "units", 'F')
        factory.AddVariable("hJet_Mt:=evalMt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Mt", "units", 'F')
        factory.AddVariable("hJet_Et:=evalEt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Et", "units", 'F')
        factory.AddVariable("hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F')
        factory.AddVariable("hJet_vtxPt", "hJet_vtxPt", "units", 'F')
        factory.AddVariable("hJet_vtx3dL", "hJet_vtx3dL", "units", 'F')
        factory.AddVariable("hJet_vtx3deL", "hJet_vtx3deL", "units", 'F')
        factory.AddVariable("hJet_vtxMass", "hJet_vtxMass", "units", 'F')
        factory.AddVariable("hJet_chf", "hJet_chf", "units", 'F')
        factory.AddVariable("hJet_nch", "hJet_nch", "units", 'F')
        factory.AddVariable("hJet_nconstituents", "hJet_nconstituents", "units", 'F')
        factory.AddVariable("hJet_JECUnc", "hJet_JECUnc", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, hJet_phi[0], hJet_phi[1])","METdPhi", "units",'F')

        #Add the variable carrying the regression target
        factory.AddTarget( "hJet_genPt" )

    elif _analysis == "Subjet":

        factory.AddVariable("fathFilterJets_pt", "fathFilterJets_pt", "units", 'F')
        factory.AddVariable("fathFilterJets_eta", "fathFilterJets_eta", "units", 'F')
        factory.AddVariable("fathFilterJets_phi", "fathFilterJets_phi", "units", 'F')
        factory.AddVariable("fathFilterJets_e", "fathFilterJets_e", "units", 'F')
        factory.AddVariable("fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "units", 'F')
        factory.AddVariable("fathFilterJets_Mt:=evalMt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Mt", "units", 'F')
        factory.AddVariable("fathFilterJets_Et:=evalEt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Et", "units", 'F')
        factory.AddVariable("fathFilterJets_ptLeadTrack", "fathFilterJets_ptLeadTrack", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxPt", "fathFilterJets_vtxPt", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3dL", "fathFilterJets_vtx3dL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3deL", "fathFilterJets_vtx3deL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxMass", "fathFilterJets_vtxMass", "units", 'F')
        factory.AddVariable("fathFilterJets_chf", "fathFilterJets_chf", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, fathFilterJets_phi[0], fathFilterJets_phi[1])","METdPhi", "units",'F')

        factory.AddTarget("fathFilterJets_genPt")

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1) 

    ## Get the Signal trees
    en7TeV = False
    en8TeV = True

    regWeight = 1.
    chain = TChain("tree")

    if en7TeV: #change the ntuple names later!!
        chain.Add("Step2_output_May11/WH_125_ForRegression.root")
        chain.Add("Step2_output_May11/WH_115_ForRegression.root")
        chain.Add("Step2_output_May11/WH_120_ForRegression.root")
        chain.Add("Step2_output_May11/WH_130_ForRegression.root")
        chain.Add("Step2_output_May11/WH_135_ForRegression.root")
        

    #if en8TeV and _analysis == "Dijet":
    #    chain.Add("dcache:/pnfs/cms/WAX/11/store/user/lpchbb/apana/Step1V33_Step2_V2/DiJetPt_ZH_ZToLL_HToBB_M-110_8TeV-powheg-herwigpp.root")
        

    if en8TeV: 
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_110_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_115_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_120_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_125_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_130_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_135_summer12_33b.root")

        
    NEntries = chain.GetEntries()
    print "Number of entries on Chain:",NEntries

    regTree = chain
    
    factory.AddRegressionTree( regTree, regWeight )

    #This would set individual event weights (the variables defined in the 
    #expression need to exist in the original TTree)
    #factory->SetWeightExpression( "var1", "Regression" )


    if _analysis == "Dijet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"         + " && " +\
            "hJet_pt[0] > 20.0"                     + " && " +\
            "hJet_pt[1] > 20.0"                     + " && " +\
            "hJet_genPt[0] > 0.0"                   + " && " +\
            "hJet_genPt[1] > 0.0"                   + " && " +\
            "hJet_eta[0] < 2.4"                     + " && " +\
            "hJet_eta[1] < 2.4"                     + " && " +\
            "hJet_id[0] > 0.0"                      + " && " +\
            "hJet_id[1] > 0.0"                      + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "H.pt > 100"


    elif _analysis == "Subjet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"                   + " && " +\
            "fathFilterJets_pt[0] > 20.0"                     + " && " +\
            "fathFilterJets_pt[1] > 20.0"                     + " && " +\
            "fathFilterJets_genPt[0] > 0.0"                   + " && " +\
            "fathFilterJets_genPt[1] > 0.0"                   + " && " +\
            "fathFilterJets_eta[0] < 2.4"                     + " && " +\
            "fathFilterJets_eta[1] < 2.4"                     + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "FatH.filteredpt > 100"

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)

    print cutString
    mycut = TCut( cutString )
        
    
    # tell the factory to use all remaining events in the trees after training for testing. The number is 25% of the events after cuts:
    if en7TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=125000:nTest_Regression=125000:SplitMode=Random:NormMode=NumEvents:!V" )
    if en8TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=111000:nTest_Regression=111000:SplitMode=Random:NormMode=NumEvents:!V" )

    #If no numbers of events are given, half of the events in the tree are used 
    #for training, and the other half for testing:
    #factory.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

    #---- Book MVA methods
   
    #please lookup the various method configuration options in the corresponding cxx files, eg:
    #src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    #it is possible to preset ranges in the option string in which the cut optimisation should be done:
    #"...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable


    #Boosted Decision Trees
    factory.BookMethod( TMVA.Types.kBDT, "BDT",
                        "!H:!V:NTrees=60:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" )

    
    # -------------------------------------------------------------------------------------------

    #---- Now you can tell the factory to train, test, and evaluate the MVAs

    # Train MVAs using the set of training events
    factory.TrainAllMethods()

    # ---- Evaluate all MVAs using the set of test events
    factory.TestAllMethods()

    # ----- Evaluate and compare performance of all configured MVAs
    factory.EvaluateAllMethods()    

    # --------------------------------------------------------------

    
    NEntries = regTree.GetEntries()
    print "Number of entries on Tree: ",NEntries

    # Save the output
    outputFile.Close()

    print "==> Wrote root file %s\n" % _outfname
    print "==> TMVARegression is done!\n"