Example #1
0
def Evaluate(outdir):

  sys.stdout = open(outdir + '/tmva.log', 'w') 

  # Output file
  output = TFile(outdir + '/tmva.root', 'RECREATE')

  # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
  # All TMVA output can be suppressed by removing the "!" (not) in 
  # front of the "Silent" argument in the option string
  factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" )

  # Set the variables use for the analysis
  input = open(outdir + '/inputvars.txt')
  for variable in input.readlines():
    factory.AddVariable(variable[:-1], 'F')

  # Set the weight directory
  TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights"

  # Limit the creation of correlation plots
  TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20  

  # Set the input file with signal and background events
  factory.SetInputTrees(
    outdir + '/signals.txt',
    outdir + '/backgrounds.txt'
  )

  cutsig = TCut('')
  cutbkg = TCut('')
  
  factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" )   

  factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
    "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) 

  # Train MVAs
  factory.TrainAllMethods()

  # Test MVAs
  factory.TestAllMethods()

  # Evaluate MVAs
  factory.EvaluateAllMethods()

  # Save the output.
  output.Close()
Example #2
0
    def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs):
        # (re)configure settings since deleting a previous Factory resets all
        # this. This is poor design, TMVA.
        config = TMVA.gConfig()
        config.GetIONames().fWeightFileDir = self.tmpdir
        config.SetSilent(True)
        config.SetDrawProgressBar(False)
        self.factory.DeleteAllMethods()

        extra_kwargs = dict()
        if self.task == "Regression":
            func = rnp.tmva.add_regression_events
        else:
            func = rnp.tmva.add_classification_events
            extra_kwargs["signal_label"] = signal_label

        # test exceptions
        assert_raises(TypeError, func, object(), X, y)
        assert_raises(ValueError, func, self.factory, X, y[: y.shape[0] / 2])
        if weights is not None:
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[: weights.shape[0] / 2])
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis])

        assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1])
        assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]])

        func(self.factory, X, y, weights=weights, **extra_kwargs)
        if X_test is None:
            X_test = X
            y_test = y
            weights_test = weights
        func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs)

        self.factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents")
        options = []
        for param, value in kwargs.items():
            if value is True:
                options.append(param)
            elif value is False:
                options.append("!{0}".format(param))
            else:
                options.append("{0}={1}".format(param, value))
        options = ":".join(options)
        self.factory.BookMethod(self.method, self.method, options)
        self.factory.TrainAllMethods()
Example #3
0
    def fit(self, X, y, X_test=None, y_test=None,
            weights=None, weights_test=None,
            signal_label=None, **kwargs):
        # (re)configure settings since deleting a previous Factory resets all
        # this. This is poor design, TMVA.
        config = TMVA.gConfig()
        config.GetIONames().fWeightFileDir = self.tmpdir
        config.SetSilent(True)
        config.SetDrawProgressBar(False)
        self.factory.DeleteAllMethods()

        extra_kwargs = dict()
        if self.task == 'Regression':
            func = rnp.tmva.add_regression_events
        else:
            func = rnp.tmva.add_classification_events
            extra_kwargs['signal_label'] = signal_label

        # test exceptions
        assert_raises(TypeError, func, object(), X, y)
        assert_raises(ValueError, func,
                      self.factory, X, y[:y.shape[0] / 2])
        if weights is not None:
            assert_raises(ValueError, func, self.factory, X, y,
                          weights=weights[:weights.shape[0]/2])
            assert_raises(ValueError, func, self.factory, X, y,
                          weights=weights[:, np.newaxis])

        assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1])
        assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]])

        func(self.factory, X, y, weights=weights, **extra_kwargs)
        if X_test is not None and y_test is not None:
            func(self.factory, X_test, y_test,
                 weights=weights_test, test=True, **extra_kwargs)

        self.factory.PrepareTrainingAndTestTree(
            TCut('1'), 'NormMode=EqualNumEvents')
        options = ':'.join(['{0}={1}'.format(param, value)
                            for param, value in kwargs.items()])
        if options:
            options = ':' + options
        self.factory.BookMethod(self.method, self.method, options)
        self.factory.TrainAllMethods()
Example #4
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    mass = DEFAULT_MASS
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-k", "--mass"):
            mass = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass
    outfname = "weights/TMVA_" + Note + ".root"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
#     gROOT.SetMacroPath( "./" )
#     gROOT.Macro       ( "./TMVAlogon.C" )
#    gROOT.LoadMacro   ( "./TMVAGui.C" )

# Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    #     factory = TMVA.Factory( "TMVAClassification", outputFile,
    #                             "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    for iVar in varList:
        if iVar[0] == 'NJets_singleLepCalc':
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I')
        else:
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    print 'mass point ' + mass
    infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % (
        mass)
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")
    #    os.exits(1)
    #BDT machinary
    factory.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir + varsList.bkg[i]))
        print inputDir + varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        factory.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression(weightStrS)
    factory.SetBackgroundWeightExpression(weightStrB)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig,
        mycutBkg,
        #                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" )
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # bdtSetting for "BDT"
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTMitFisher"
    bdtFSetting = '!H:!V:NTrees=%s' % nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTG"
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray'  #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTB"
    bdtBSetting = '!H:!V:NTrees=%s' % nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTD"
    bdtDSetting = '!H:!V:NTrees=%s' % nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
    #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m

    #BOOKING AN ALGORITHM
    if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting)
    if methods == "BDTG":
        factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting)
    if methods == "BDTMitFisher":
        factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting)
    if methods == "BDTB":
        factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting)
    if methods == "BDTD":
        factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting)
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    #
    #     print "=== wrote root file %s\n" % outfname
    #     print "=== TMVAClassification is done!\n"

    # save plots:
    os.chdir('weights/' + Note)
    #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies
    gROOT.SetBatch(1)
    TMVA.efficiencies(
        "../../" + outfname
    )  #Classifier Background Rejection vs Signal Efficiency (ROC curve)
    #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve)
    TMVA.mvas("../../" + outfname,
              0)  #Classifier Output Distributions (test sample)
    TMVA.correlations(
        "../../" + outfname)  #Input Variable Linear Correlation Coefficients
    TMVA.variables("../../" + outfname)  #Input variables (training sample)
    #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed)
    #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed)
    if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname)
    #     os.chdir('plots')
    #     try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png')
    #     except: pass
    #     #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png')
    #     #except: pass
    #     try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png')
    #     except: pass
    #     os.system('rm *.eps')
    print "DONE"
Example #5
0
        'BDTD': 0,
        'BDTF': 0,
        'MLP': 0,
        'MLPBFGS': 0,
        'MLPBNN': 0,
        'CFMlpANN': 0,
        'TMlpANN': 0
    }

    factory = t.Factory(
        'vbf_bdt_combined_james_current', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification'
    )
    factory.Print()

    t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir
    dataloader = t.DataLoader(".")

    dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F')
    dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F')
    dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F')
    dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F')
    dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F')
    dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F')
    dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F')
    dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james',
                           'F')
    dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F')
    dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F')

    # trees for training
Example #6
0
loader.PrepareTrainingAndTestTree(
    mycutSig, mycutBkg,
    "nTrain_Signal=" + str(NSIG) +\
  ":nTrain_Background=" + str(NBKG) +\
  ":nTest_Signal=" + str(NSIG_TEST) +\
  ":nTest_Background=" + str(NBKG_TEST) +\
  ":SplitMode=Random:NormMode=NumEvents:!V"
)

factory = TMVA.Factory(
    "Optimization",
    '!V:!ROC:!Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
)

(TMVA.gConfig().GetIONames()).fWeightFileDir = '/weights'

kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + modelName +\
               ':SaveBestOnly=true' +\
               ':NumEpochs=' + str(epochs) +\
               ':BatchSize=' + str(batchSize) +\
               ':TriesEarlyStopping=' + str(patience)

factory.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting)

factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

ROC = factory.GetROCIntegral(dataset + "/optimize_" + outf_key, "PyKeras")
Example #7
0
def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    factory.AddVariable( "MMC_h2massweight1_prob", "MMC_h2massweight1_prob", "", 'F' ) ##ADDED

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run() 
def main():  # runs the program
    checkRootVer()  # check that ROOT version is correct

    try:  # retrieve command line options
        shortopts = "d:o:v:w:y:h?"  # possible command line options
        longopts = [
            "dataset=", "option=", "where=", "year=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array(
        [  # Stores the command line arguments   
            ['-d', '--dataset', 'dataset', 'dataset'],
            ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017],
            ['-o', '--option', 'option', 0],
            ['-v', '--verbose', 'verbose', True]
        ],
        dtype="object")

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[index, 3] = str(
                arg)  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some containers
    bkg_list = []
    bkg_trees_list = []
    sig_list = []
    sig_trees_list = []

    # Initialize some variables after reading in arguments
    option_index = np.where(myArgs[:, 2] == 'option')[0][0]
    dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    DATASETPATH = myArgs[dataset_index][3]
    DATASET = DATASETPATH.split("/")[0]
    OPTION = myArgs[option_index][3]
    VERBOSE = myArgs[verbose_index][3]
    WHERE = myArgs[where_index][3]
    YEAR = myArgs[year_index][3]

    if WHERE == "lpc":
        if YEAR == 2017:
            inputDir = varsList.inputDirLPC2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirLPC2018
    else:
        if YEAR == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirBRUX2018

    if OPTION == "0":
        print("Using Option 0: default varList")
        varList = varsList.varList["DNN"]

    elif OPTION == "1":
        print("Using Option 1: selected data from {}".format(DATASETPATH))
        varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines()
        varList = []
        START = False
        for line in varsListHPO:
            if START == True:
                varList.append(str(line.strip()))
            if "Variable List:" in line:
                START = True

    numVars = len(varList)
    outf_key = str("Keras_" + str(numVars) + "vars")
    OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root"
    outputfile = TFile(OUTF_NAME, "RECREATE")

    # initialize and set-up TMVA factory

    factory = TMVA.Factory(
        "Training", outputfile,
        "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    factory.SetVerbose(bool(myArgs[verbose_index, 3]))
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key

    # initialize and set-up TMVA loader

    loader = TMVA.DataLoader(DATASET)

    if OPTION == "0":
        for var in varList:
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], 'I')
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")
    if OPTION == "1":
        for var in varList:
            if var == "NJets_MultiLepCalc":
                loader.AddVariable(var, "", "", "I")
            else:
                loader.AddVariable(var, "", "", "F")

    # add signal files
    if YEAR == 2017:
        for i in range(len(varsList.sig2017_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.sig2018_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add background files
    if YEAR == 2017:
        for i in range(len(varsList.bkg2017_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.bkg2018_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    loader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    ######################################################
    ######################################################
    ######                                          ######
    ######            K E R A S   D N N             ######
    ######                                          ######
    ######################################################
    ######################################################
    HIDDEN = 0
    NODES = 0
    LRATE = 0.
    PATTERN = ""
    REGULATOR = ""
    ACTIVATION = ""
    BATCH_SIZE = 0
    # modify this when implementing hyper parameter optimization:
    model_name = 'TTTT_' + str(numVars) + 'vars_model.h5'

    EPOCHS = 100
    PATIENCE = 20

    # edit these based on hyper parameter optimization results
    if OPTION == "0":
        HIDDEN = 3
        NODES = 100
        LRATE = 0.01
        PATTERN = 'static'
        REGULATOR = 'none'
        ACTIVATION = 'relu'
        BATCH_SIZE = 256
    if OPTION == "1":
        datasetDir = os.listdir(DATASETPATH)
        for file in datasetDir:
            if "params" in file: optFileName = file
        optFile = open(DATASETPATH + "/" + optFileName, "r").readlines()
        START = False
        for line in optFile:
            if START == True:
                if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip())
                if "Initial" in line: NODES = int(line.split(":")[1].strip())
                if "Batch" in line:
                    BATCH_SIZE = 2**int(line.split(":")[1].strip())
                if "Learning" in line:
                    LRATE = float(line.split(":")[1].strip())
                if "Pattern" in line: PATTERN = str(line.split(":")[1].strip())
                if "Regulator" in line:
                    REGULATOR = str(line.split(":")[1].strip())
                if "Activation" in line:
                    ACTIVATION = str(line.split(":")[1].strip())
            if "Optimized Parameters:" in line: START = True
    kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \
                   ':SaveBestOnly=true' + \
                   ':NumEpochs=' + str(EPOCHS) + \
                   ':BatchSize=' + str(BATCH_SIZE) + \
                   ':TriesEarlyStopping=' + str(PATIENCE)

    model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION,
                        numVars)
    model.save(model_name)
    model.summary()

    factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting)

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputfile.Close()

    print("Finished training in " + str(
        (time.time() - START_TIME) / 60.0) + " minutes.")

    ROC = factory.GetROCIntegral(DATASET, 'PyKeras')
    print('ROC value is: {}'.format(ROC))
    if OPTION == "1":
        varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a")
        varsListHPOtxt.write("ROC Value: {}".format(ROC))
Example #9
0
import sys
import os
from ROOT import TMVA, TFile, TCut
from config import cfg, variables_iso_only
from os.path import join
import uproot
import root_pandas

TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()).fNbinsXOfROCCurve = 400

out_dir_base = join(cfg["out_dir"], cfg['submit_version'])

max_n_per_class = 200000

for idname in cfg["trainings"]:

    for training_bin in cfg["trainings"][idname]:

        print("Process training pipeline for {0} {1}".format(idname, training_bin))

        out_dir = join(out_dir_base, idname, training_bin, "legacy")

        if not os.path.exists(out_dir):
            os.makedirs(join(out_dir))

        feature_cols = cfg["trainings"][idname][training_bin]["variables"]

        outfileName = join(out_dir, "TMVA.root")
        print("---> Working with OutfileName = " + outfileName);
Example #10
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:s:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    SeedN = DEFAULT_SEED
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-s", "--seed"):
            SeedN = long(a)
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth
    outfname = "dataset/weights/TMVA_" + Note + ".root"

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    fClassifier = TMVA.Factory(
        "VariableImportance",
        "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"
    )
    str_xbitset = '{:053b}'.format(SeedN)

    seeddl = TMVA.DataLoader(str_xbitset)

    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'

    index = 52

    for iVar in varList:
        if (str_xbitset[index] == '1'):
            seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F')
            print iVar[0]
        index = index - 1

    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")

    seeddl.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    bkgList = varsList.bkg

    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir + bkgList[i]))
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        seeddl.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1

    seeddl.SetSignalWeightExpression(weightStrS)
    seeddl.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    seeddl.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting)
    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT")
    print "ROC-integral : ", str_xbitset, " ", SROC
    print "SEED " + str_xbitset + " DONE"
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()
    print "=================================================================="
    print "=================================================================="
Example #11
0
def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
#!    gROOT.LoadMacro   ( "./TMVAGui.C" )
 
   
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run() 
##  * Train a BDT with TMVA
##
## \macro_image
## \macro_output
## \macro_code
##
## \author Lailin XU
## Modified from [RegressionKeras.py](https://root.cern/doc/master/RegressionKeras_8py.html) and [TMVARegression.C](https://root.cern/doc/master/TMVARegression_8C.html)

from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile
 
# Setup TMVA
TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = 5 
 
outfileName = 'TMVA_tutorial_reg_1.root'
output = TFile.Open(outfileName, 'RECREATE')
factory = TMVA.Factory('TMVARegression', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Regression')
 
# Load data
trfile = "SM_ttbar.root"
if not isfile('tmva_reg_example.root'):
    call(['curl', '-L', '-O', 'http://root.cern.ch/files/tmva_reg_example.root'])
 
data = TFile.Open(trfile)
if not data:
  print("Error! file not opened", trfile)
trname = "tree"
tree = data.Get(trname)
Example #13
0
    test_size=0.2,
    #train_size=10000,
    #test_size=5000,
    random_state=0)
# Training
if not args.quiet:
    print 'start training ...'
if args.timeit:
    start = timer()

output = TFile(args.outdir + 'tmva_output.root', 'recreate')
factory = TMVA.Factory(
    'TMVA', output, 'AnalysisType=Classification:'
    '!V:Silent:!DrawProgressBar')
dataloader = TMVA.DataLoader("")
TMVA.gConfig().GetIONames().fWeightFileDir = args.outdir + 'weights/'

for v in var:
    vtype = 'I' if v in [
        'nJet', 'tau0_decaymode', 'tau1_decaymode', 'ntags', 'ntags_loose'
    ] else 'F'
    dataloader.AddVariable(v, vtype)

add_classification_events(dataloader, x_train, y_train, weights=w_train)
add_classification_events(dataloader,
                          x_test,
                          y_test,
                          weights=w_test,
                          test=True)

norm = 'None'
def main():  # runs the program
    try:  # retrieve command line options
        shortopts = "o:w:y:v:s:h?"  # possible command line options
        longopts = [
            "outputfile=", "where=", "year=", "verbose", "seed=", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array([  # Stores the command line arguments    
        ['-o', '--outputfile', 'outfname', DEFAULT_OUTFNAME],
        ['-v', '--verbose', 'verbose', True],
        ['-w', '--where', 'where', "lpc"],
        ['-y', '--year', 'year', 2017],
        ['-s', '--seed', 'SeedN', DEFAULT_SEED],
    ])

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[
                index,
                3] = arg  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some variables after reading in arguments
    SeedN_index = np.where(myArgs[:, 2] == 'SeedN')[0][0]
    outfname_index = np.where(myArgs[:, 2] == 'outfname')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    seed = myArgs[SeedN_index, 3]
    where = myArgs[where_index, 3]
    year = int(myArgs[year_index, 3])
    varList = varsList.varList["DNN"]
    var_length = len(varList)

    str_xbitset = '{:0{}b}'.format(long(myArgs[SeedN_index, 3]), var_length)
    nVars = str_xbitset.count('1')
    outf_key = "DNN_" + str(nVars) + "vars"
    myArgs[outfname_index, 3] = "dataset/weights/TMVA_" + outf_key + ".root"

    print("Seed: {}".format(str_xbitset))

    outputfile = TFile(myArgs[outfname_index, 3], 'RECREATE')

    checkRootVer()  # check that ROOT version is correct

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    sig_list = []
    sig_trees_list = []
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []

    if where == "brux":
        if year == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif year == 2018:
            inputDir = varsList.inputDirBRUX2018
    else:
        inputDir = varsList.inputDirCondor

    # Set up TMVA
    ROOT.TMVA.Tools.Instance()
    ROOT.TMVA.PyMethodBase.PyInitialize()

    fClassifier = TMVA.Factory(
        'VariableImportance',
        '!V:!ROC:Silent:!Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
    )

    fClassifier.SetVerbose(bool(myArgs[verbose_index, 3]))

    loader = TMVA.DataLoader("dataset/" + str_xbitset)

    for indx, var in enumerate(varList):
        if (str_xbitset[indx] == '1'):
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], "I")
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")

    # add signals to loader
    if year == 2017:
        for i in range(len(varsList.sig2017_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])
    elif year == 2018:
        for i in range(len(varsList.sig2018_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add backgrounds to loader
    if year == 2017:
        for i in range(len(varsList.bkg2017_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)
            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif year == 2018:
        for i in range(len(varsList.bkg2018_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    # set signal and background weights
    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    # set cut thresholds for signal and background
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    NSIG = 0
    NSIG_TEST = 0
    NBKG = 0
    NBKG_TEST = 0

    loader.PrepareTrainingAndTestTree(
      mycutSig, mycutBkg,
      "nTrain_Signal=" + str(NSIG) + \
      ":nTrain_Background=" + str(NBKG) + \
      ":nTest_Signal=" + str(NSIG_TEST) + \
      ":nTest_Background=" + str(NBKG_TEST) + \
      ":SplitMode=Random:NormMode=NumEvents:!V"
    )

    #####################################################
    #####################################################
    ######                                         ######
    ######            K E R A S   D N N            ######
    ######                                         ######
    #####################################################
    #####################################################

    model_name = "TTTT_TMVA_model.h5"

    model = Sequential()
    model.add(
        Dense(100,
              input_dim=nVars,
              kernel_initializer="glorot_normal",
              activation="relu"))
    for i in range(2):
        model.add(BatchNormalization())
        model.add(
            Dense(100, kernel_initializer="glorot_normal", activation="relu"))
    model.add(Dense(2, activation="sigmoid"))

    model.compile(loss="categorical_crossentropy",
                  optimizer=Adam(),
                  metrics=["accuracy"])

    model.save(model_name)
    model.summary()

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    kerasSetting = "!H:!V:VarTransform=G:FilenameModel=" + model_name + \
     ":NumEpochs=15:BatchSize=512" # the trained model has to be specified in this string

    # run the classifier
    fClassifier.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras",
                           kerasSetting)

    (TMVA.gConfig().GetIONames()
     ).fWeightFileDir = str_xbitset + "/weights/" + outf_key
    #print("New weight file directory: {}".format((TMVA.gConfig().GetIONames()).fWeightFileDir))

    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral("dataset/" + str_xbitset, "PyKeras")
    print("ROC-integral: {}".format(SROC))
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()

    outputfile.Close()
##
## \macro_image
## \macro_output
## \macro_code
##
## \author Lailin XU
## Modified from [ClassificationKeras.py](https://root.cern/doc/master/ClassificationKeras_8py.html) and [TMVAClassification.C](https://root.cern/doc/master/TMVAClassification_8C.html)

from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile

# Setup TMVA
# =======================
TMVA.Tools.Instance()
(TMVA.gConfig().GetVariablePlotting()
 ).fMaxNumOfAllowedVariablesForScatterPlots = 5

outfileName = 'TMVA_tutorial_cla_1.root'
output = TFile.Open(outfileName, 'RECREATE')
# Create the factory object. Later you can choose the methods whose performance you'd like to investigate. The factory is
#    the only TMVA object you have to interact with
#
#    The first argument is the base of the name of all the weightfiles in the directory weight/
#    The second argument is the output file for the training results
factory = TMVA.Factory(
    "TMVAClassification", output,
    "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
)

# Load data
Example #16
0
def main():

    try:
        # retrive command line options
        shortopts  = "a:o:r:vh?"
        longopts   = ["analysis=","outputfile=", "regression=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    _regression = REGRESSION

    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-r", "--regression"):
            _regression = True
        elif o in ("-v", "--verbose"):
            verbose = True

            
    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA


    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_" + _analysis

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    if _analysis == "Dijet":

        if not _regression:
            factory.AddVariable("H_mass := H.mass", 'F');
            factory.AddVariable("H_pt :=H.pt", 'F');
            factory.AddVariable("hJet_pt1 := hJet_pt[0]", 'F')
            factory.AddVariable("hJet_pt2 := hJet_pt[1]", 'F')
        else:
            factory.AddVariable("HCorr_mass := newHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newHiggsPt", 'F');
            factory.AddVariable("hJ1Corr_pt := hJet_genPtReg0", 'F');
            factory.AddVariable("hJ2Corr_pt := hJet_genPtReg1", 'F');

        factory.AddVariable("V_pt :=V.pt", 'F');
        factory.AddVariable("H_dR := H.dR", 'F');
        factory.AddVariable("hJ12_MaxCsv := max(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("hJ12_MinCsv := min(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("HV_dPhi := HVdPhi", 'F');
        factory.AddVariable("H_dEta := H.dEta", 'F');
        factory.AddVariable("NAddJet:=Sum$(aJet_pt>20 && abs(aJet_eta)<4.5)", 'I' );
        factory.AddVariable("dPull := deltaPullAngle", 'F');
        

        # You can add so-called "Spectator variables", which are not used in the MVA training, 
        # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
        # input variables, the response values of all trained MVAs, and the spectator variables
        #factory.AddSpectator("hJet_pt1 := hJet_pt[0]", 'F');
        #factory.AddSpectator("hJet_pt2 := hJet_pt[1]", 'F');

    elif _analysis == "Subjet":

        if not _regression:
            factory.AddVariable("H_mass := FatH.filteredmass", 'F');
            factory.AddVariable("H_pt   := FatH.filteredpt", 'F');
            factory.AddVariable("SJ1_pt := fathFilterJets_pt[0]", 'F');
            factory.AddVariable("SJ2_pt := fathFilterJets_pt[1]", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F');
        else:
            factory.AddVariable("HCorr_mass := newfatHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newfatHiggsPt", 'F');
            factory.AddVariable("SJ1Corr_pt := fathFilterJets_genPtReg0", 'F');
            factory.AddVariable("SJ2Corr_pt := fathFilterJetsx_genPtReg1", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); #change later
            
        factory.AddVariable("V_pt   := V.pt", 'F');
        factory.AddVariable("HV_dPhi := " +\
                             "FatH.filteredphi - V.phi > pi ? " +\
                             "abs(FatH.filteredphi - V.phi - 2*pi) : " +\
                             "FatH.filteredphi - V.phi < -pi ? " +\
                             "abs(FatH.filteredphi - V.phi + 2*pi) : " +\
                             "abs(FatH.filteredphi - V.phi)", 'F' )

        factory.AddVariable("SJ1_csv := fathFilterJets_csv[0]", 'F');
        factory.AddVariable("SJ2_csv := fathFilterJets_csv[1]", 'F');
        factory.AddVariable("SJ3_csv := Alt$(fathFilterJets_csv[2],0)", 'F');

        factory.AddVariable("SJ12_dEta := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fabs(fathFilterJets_eta[0] - fathFilterJets_eta[1] )", 'F');
        
        factory.AddVariable("SJ13_dEta := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "abs( fathFilterJets_eta[0] - Alt$(fathFilterJets_eta[2],0))", 'F');

        factory.AddVariable("SJ12_dPhi := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] > pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] - 2*pi) : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] < -pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] + 2*pi) : " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1])", 'F');

        factory.AddVariable("SJ13_dPhi := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) > pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) - 2*pi) : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) < -pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) + 2*pi) : " +\
                                "abs(fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("SJ12_dR := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],fathFilterJets_eta[1],fathFilterJets_phi[1])", 'F');
        
        factory.AddVariable("SJ13_dR := nfathFilterJets < 3 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],Alt$(fathFilterJets_eta[2],0),Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("NAddJet:= " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "Sum$(aJet_pt>20 && abs(aJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],aJet_eta,aJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],aJet_eta,aJet_phi)>0.3)+Sum$(hJet_pt>20 && abs(hJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],hJet_eta,hJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],hJet_eta,hJet_phi)>0.3)", 'I' );
       

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    ## Get the Signal and Background trees
    for Sample in SAMPLES.keys():
        SampleInfo=SAMPLES[Sample]

        SampleType=SampleInfo[0] # signal or background
        infile=os.path.join(INPUTDIR,SampleInfo[1])
        xs=SampleInfo[2]

        ## get number of step 1 events
        f=TFile.Open(infile)
        h = f.Get("Count")
        nEVT=int(h.GetBinContent(1))

        wt  =xs/(nEVT)        
        print Sample,": ",infile
        print "XS:nEVT:wt: ", xs,nEVT,wt

        theTree      = f.Get( TREE )
        if SampleType == "S":
            factory.AddSignalTree    ( theTree, wt )
        elif SampleType == "B":
            factory.AddBackgroundTree( theTree, wt )        
        else:
            print "Trouble extracting SampleType for this sample"
            sys.exit(1)
        

    # table10 AN-2011/430
    if _analysis == "Dijet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "H.HiggsFlag > 0"        + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.244"  + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " 

        if not _regression:
            cutString += \
                "hJet_pt[0] > 20.0"      + " && " +\
                "hJet_pt[1] > 20.0"      + " && " +\
                "H.mass > 80.0"          + " && " +\
                "H.mass < 150.0"         

        else:
            cutString += \
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "newHiggsMass > 80.0 && newHiggsMass < 150.0"

    elif _analysis == "Subjet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "FatH.FatHiggsFlag > 0"  + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "nfathFilterJets >= 2"   + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244"  + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && "

        if not _regression:
            cutString += \
                "fathFilterJets_pt[0] > 20.0"      + " && " +\
                "fathFilterJets_pt[1] > 20.0"      + " && " +\
                "FatH.filteredmass > 80.0"         + " && " +\
                "FatH.filteredmass < 150.0"     
        else:
            cutString += \
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "newfatHiggsMass > 80.0 && newfatHiggsMass < 150.0"


    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    print cutString
    mycutSig = TCut( cutString ) 
    mycutBkg = TCut( cutString ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples

    prepareOptions="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V"
    #prepareOptions="SplitMode=Random:!V"
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, prepareOptions)


    bdtOptions = \
        "!H"                       + ":" +\
        "!V"                       + ":" +\
        "NTrees=850"               + ":" +\
        "nEventsMin=150"           + ":" +\
        "MaxDepth=3"               + ":" +\
        "BoostType=AdaBoost"       + ":" +\
        "AdaBoostBeta=0.3"         + ":" +\
        "SeparationType=GiniIndex" + ":" +\
        "nCuts=20"                 + ":" +\
        "PruneMethod=NoPruning"
    # "PruneMethod=CostComplexity"

    # 

    print bdtOptions

    factory.BookMethod( TMVA.Types.kBDT, "BDT", bdtOptions)
   

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % _outfname
    print "=== TMVAClassification is done!\n"
Example #17
0
def TMVAClassification(methods,
                       sigfname,
                       bkgfname,
                       optname,
                       channel,
                       trees,
                       verbose=False):  #="DecayTree,DecayTree"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Define trees
    trees = trees.split(",")
    if len(trees) - trees.count('') != 2:
        print "ERROR: need to give two trees (each one for signal and background)"
        print trees
        sys.exit(1)
    treeNameSig = trees[0]
    treeNameBkg = trees[1]

    # Print output file and directory
    outfname = "TMVA_%s_%s.root" % (channel, optname)
    myWeightDirectory = "weights_%s_%s" % (channel, optname)
    print "=== TMVAClassification: output will be written to:"
    print "=== %s" % outfname
    print "=== %s" % myWeightDirectory

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    #gROOT.SetMacroPath( "./" )
    #gROOT.Macro       ( "./tmva/test/TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary??

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Setup TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Load data
    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory
    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    print "*** Training on channel:"
    print "*** %s" % channel
    print "***"
    '''
    if channel == "1":
        #dataloader.AddVariable( "pplus_ProbNNp",                      "Prob(p^{+})",                             "",     'F' );
        #dataloader.AddVariable( "Kminus_ProbNNk",                     "Prob(K^{-})",                             "",     'F' );

        dataloader.AddVariable( "pplus_PT",                           "P_{T}(p^{+})",                             "MeV", 'F' );
        dataloader.AddVariable( "Kminus_PT",                          "P_{T}(K^{-})",                             "MeV", 'F' );
        dataloader.AddVariable( "gamma_PT",                           "P_{T}(#gamma)",                            "MeV", 'F' );
        dataloader.AddVariable( "Lambda_1520_0_PT",                   "P_{T}(#Lambda(1520))",                     "MeV", 'F' );
        dataloader.AddVariable( "B_PT",                               "P_{T}(#Lambda_{b})",                       "MeV", 'F' );

        dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta",             "MeV", 'F' );
        dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );
        dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}",                          "MeV", 'F' );

        dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' );
        dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' );

        dataloader.AddVariable( "pplus_IPCHI2_OWNPV",                 "#chi^{2}_{IP}(p^{+})",                       ""  ,  'F' );
        dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable( "B_IPCHI2_OWNPV",                     "#chi^{2}_{IP}(#Lambda_{b})",                 ""  ,  'F' );
        #dataloader.AddVariable( "gamma_IPCHI2_OWNPV",                 "IP #chi^{2}(#gamma)",                       ""  ,  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV",             "IP(#Lambda(1520))",                        "mm",  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV",         "IP#chi^{2}(#Lambda(1520))",               "",    'F' );
        
        dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2",           "#chi^{2}_{vertex}(#Lambda(1520))",           ""  ,  'F' );
        dataloader.AddVariable( "B_OWNPV_CHI2",                       "#chi^{2}_{vertex}(#Lambda_{b})",             ""  ,  'F' );
        dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' );
        
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda(1520))",               "",    'F' );
        dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' );
    '''

    if channel == "2":
        dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F')
        dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F')
        dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F')
        dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV",
                               'F')
        dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F')

        dataloader.AddVariable(
            "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)",
            "#beta", "", 'F')
        #ok
        #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );#BDT learned Mass check1
        dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P",
                               "P_{tot,2}", "MeV", 'F')
        #ok

        #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta
        dataloader.AddVariable(
            "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)",
            "#eta(K^{-})-#eta(p^{+})", "", 'F')
        dataloader.AddVariable(
            "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)",
            "#eta(#Lambda*)", "", 'F')
        dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)",
                               "#eta(#gamma)", "", 'F')

        dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})",
                               "", 'F')
        #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})",
                               "", 'F')
        dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV",
                               "#chi^{2}_{IP}(#Lambda*)", "", 'F')

        dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda*)", "", 'F')
        dataloader.AddVariable("B_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F')
        #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF",    "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#BDT learned Mass check1
        #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF",          "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#put it out because array

        #dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' ); #not used by BDT
        #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV",           "DIRA(#Lambda*)",                        ""  ,  'F' ); #not used
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda*)",               "",    'F' ); #not used
        #dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' ); #not used

    # Add Spectator Variables: not used for Training but written in final TestTree
    #dataloader.AddSpectator( "B_M",                                   "M(#Lambda_{b})",                           "MeV");
    #dataloader.AddSpectator( "Lambda_1520_0_M",                       "M(#Lambda*)",                         "MeV");

    # Read input data
    if gSystem.AccessPathName(sigfname) != 0:
        print "Can not find %s" % sigfname
    if gSystem.AccessPathName(bkgfname) != 0:
        print "Can not find %s" % bkgfname

    inputSig = TFile.Open(sigfname)
    inputBkg = TFile.Open(bkgfname)

    # Get the signal and background trees for training
    signal = inputSig.Get(treeNameSig)
    background = inputBkg.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree(signal, signalWeight)
    dataloader.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    dataloader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : dataloader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: dataloader.SetBackgroundWeightExpression("weight1*weight2");
    #dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut(
        ""
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample
    #print(sigfname + str( mycutSig ) + treeNameSig)

    mycutBkg = TCut(
        "B_M>6120"
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" )
    #print(bkgfname + str( mycutBkg ) + treeNameBkg)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            dataloader,
            TMVA.Types.kMLP,
            "MLP",
            #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try
            "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )  #Old

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2"
        )  #Settings3
        #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote
        #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old

    if "BDT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    print("FLAG 0")
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"
Example #19
0
def TMVARegression():

    try:
        # retrieve command line options
        shortopts  = "a:o:vh?"
        longopts   = ["analysis=","outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )
        
    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)
        
    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-v", "--verbose"):
            verbose = True

    
            
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )

    #   // Create the factory object. Later you can choose the methods
    #   // whose performance you'd like to investigate. The factory will
    #   // then run the performance analysis for you.
    #   //
    #   // The first argument is the base of the name of all the
    #   // weightfiles in the directory weights_Reg/ 
    #   //
    #   // The second argument is the output file for the training results
    #   // All TMVA output can be suppressed by removing the "!" (not) in 
    #   // front of the "Silent" argument in the option string
    factory = TMVA.Factory ("TMVARegression", outputFile, 
                                                   "!V:!Silent:Color:DrawProgressBar" )
    # Set verbosity
    factory.SetVerbose( verbose )
    
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_Reg_8TeV" + "_" + _analysis

    if _analysis == "Dijet":
  
        factory.AddVariable("hJet_pt", "hJet_pt", "units", 'F')
        factory.AddVariable("hJet_eta", "hJet_eta", "units", 'F')
        factory.AddVariable("hJet_phi", "hJet_phi", "units", 'F')
        factory.AddVariable("hJet_e", "hJet_e", "units", 'F')
        factory.AddVariable("hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "units", 'F')
        factory.AddVariable("hJet_Mt:=evalMt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Mt", "units", 'F')
        factory.AddVariable("hJet_Et:=evalEt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Et", "units", 'F')
        factory.AddVariable("hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F')
        factory.AddVariable("hJet_vtxPt", "hJet_vtxPt", "units", 'F')
        factory.AddVariable("hJet_vtx3dL", "hJet_vtx3dL", "units", 'F')
        factory.AddVariable("hJet_vtx3deL", "hJet_vtx3deL", "units", 'F')
        factory.AddVariable("hJet_vtxMass", "hJet_vtxMass", "units", 'F')
        factory.AddVariable("hJet_chf", "hJet_chf", "units", 'F')
        factory.AddVariable("hJet_nch", "hJet_nch", "units", 'F')
        factory.AddVariable("hJet_nconstituents", "hJet_nconstituents", "units", 'F')
        factory.AddVariable("hJet_JECUnc", "hJet_JECUnc", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, hJet_phi[0], hJet_phi[1])","METdPhi", "units",'F')

        #Add the variable carrying the regression target
        factory.AddTarget( "hJet_genPt" )

    elif _analysis == "Subjet":

        factory.AddVariable("fathFilterJets_pt", "fathFilterJets_pt", "units", 'F')
        factory.AddVariable("fathFilterJets_eta", "fathFilterJets_eta", "units", 'F')
        factory.AddVariable("fathFilterJets_phi", "fathFilterJets_phi", "units", 'F')
        factory.AddVariable("fathFilterJets_e", "fathFilterJets_e", "units", 'F')
        factory.AddVariable("fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "units", 'F')
        factory.AddVariable("fathFilterJets_Mt:=evalMt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Mt", "units", 'F')
        factory.AddVariable("fathFilterJets_Et:=evalEt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Et", "units", 'F')
        factory.AddVariable("fathFilterJets_ptLeadTrack", "fathFilterJets_ptLeadTrack", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxPt", "fathFilterJets_vtxPt", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3dL", "fathFilterJets_vtx3dL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3deL", "fathFilterJets_vtx3deL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxMass", "fathFilterJets_vtxMass", "units", 'F')
        factory.AddVariable("fathFilterJets_chf", "fathFilterJets_chf", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, fathFilterJets_phi[0], fathFilterJets_phi[1])","METdPhi", "units",'F')

        factory.AddTarget("fathFilterJets_genPt")

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1) 

    ## Get the Signal trees
    en7TeV = False
    en8TeV = True

    regWeight = 1.
    chain = TChain("tree")

    if en7TeV: #change the ntuple names later!!
        chain.Add("Step2_output_May11/WH_125_ForRegression.root")
        chain.Add("Step2_output_May11/WH_115_ForRegression.root")
        chain.Add("Step2_output_May11/WH_120_ForRegression.root")
        chain.Add("Step2_output_May11/WH_130_ForRegression.root")
        chain.Add("Step2_output_May11/WH_135_ForRegression.root")
        

    #if en8TeV and _analysis == "Dijet":
    #    chain.Add("dcache:/pnfs/cms/WAX/11/store/user/lpchbb/apana/Step1V33_Step2_V2/DiJetPt_ZH_ZToLL_HToBB_M-110_8TeV-powheg-herwigpp.root")
        

    if en8TeV: 
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_110_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_115_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_120_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_125_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_130_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_135_summer12_33b.root")

        
    NEntries = chain.GetEntries()
    print "Number of entries on Chain:",NEntries

    regTree = chain
    
    factory.AddRegressionTree( regTree, regWeight )

    #This would set individual event weights (the variables defined in the 
    #expression need to exist in the original TTree)
    #factory->SetWeightExpression( "var1", "Regression" )


    if _analysis == "Dijet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"         + " && " +\
            "hJet_pt[0] > 20.0"                     + " && " +\
            "hJet_pt[1] > 20.0"                     + " && " +\
            "hJet_genPt[0] > 0.0"                   + " && " +\
            "hJet_genPt[1] > 0.0"                   + " && " +\
            "hJet_eta[0] < 2.4"                     + " && " +\
            "hJet_eta[1] < 2.4"                     + " && " +\
            "hJet_id[0] > 0.0"                      + " && " +\
            "hJet_id[1] > 0.0"                      + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "H.pt > 100"


    elif _analysis == "Subjet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"                   + " && " +\
            "fathFilterJets_pt[0] > 20.0"                     + " && " +\
            "fathFilterJets_pt[1] > 20.0"                     + " && " +\
            "fathFilterJets_genPt[0] > 0.0"                   + " && " +\
            "fathFilterJets_genPt[1] > 0.0"                   + " && " +\
            "fathFilterJets_eta[0] < 2.4"                     + " && " +\
            "fathFilterJets_eta[1] < 2.4"                     + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "FatH.filteredpt > 100"

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)

    print cutString
    mycut = TCut( cutString )
        
    
    # tell the factory to use all remaining events in the trees after training for testing. The number is 25% of the events after cuts:
    if en7TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=125000:nTest_Regression=125000:SplitMode=Random:NormMode=NumEvents:!V" )
    if en8TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=111000:nTest_Regression=111000:SplitMode=Random:NormMode=NumEvents:!V" )

    #If no numbers of events are given, half of the events in the tree are used 
    #for training, and the other half for testing:
    #factory.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

    #---- Book MVA methods
   
    #please lookup the various method configuration options in the corresponding cxx files, eg:
    #src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    #it is possible to preset ranges in the option string in which the cut optimisation should be done:
    #"...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable


    #Boosted Decision Trees
    factory.BookMethod( TMVA.Types.kBDT, "BDT",
                        "!H:!V:NTrees=60:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" )

    
    # -------------------------------------------------------------------------------------------

    #---- Now you can tell the factory to train, test, and evaluate the MVAs

    # Train MVAs using the set of training events
    factory.TrainAllMethods()

    # ---- Evaluate all MVAs using the set of test events
    factory.TestAllMethods()

    # ----- Evaluate and compare performance of all configured MVAs
    factory.EvaluateAllMethods()    

    # --------------------------------------------------------------

    
    NEntries = regTree.GetEntries()
    print "Number of entries on Tree: ",NEntries

    # Save the output
    outputFile.Close()

    print "==> Wrote root file %s\n" % _outfname
    print "==> TMVARegression is done!\n"