Ejemplo n.º 1
0
def Evaluate(outdir):

  sys.stdout = open(outdir + '/tmva.log', 'w') 

  # Output file
  output = TFile(outdir + '/tmva.root', 'RECREATE')

  # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
  # All TMVA output can be suppressed by removing the "!" (not) in 
  # front of the "Silent" argument in the option string
  factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" )

  # Set the variables use for the analysis
  input = open(outdir + '/inputvars.txt')
  for variable in input.readlines():
    factory.AddVariable(variable[:-1], 'F')

  # Set the weight directory
  TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights"

  # Limit the creation of correlation plots
  TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20  

  # Set the input file with signal and background events
  factory.SetInputTrees(
    outdir + '/signals.txt',
    outdir + '/backgrounds.txt'
  )

  cutsig = TCut('')
  cutbkg = TCut('')
  
  factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" )   

  factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
    "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) 

  # Train MVAs
  factory.TrainAllMethods()

  # Test MVAs
  factory.TestAllMethods()

  # Evaluate MVAs
  factory.EvaluateAllMethods()

  # Save the output.
  output.Close()
Ejemplo n.º 2
0
    def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs):
        # (re)configure settings since deleting a previous Factory resets all
        # this. This is poor design, TMVA.
        config = TMVA.gConfig()
        config.GetIONames().fWeightFileDir = self.tmpdir
        config.SetSilent(True)
        config.SetDrawProgressBar(False)
        self.factory.DeleteAllMethods()

        extra_kwargs = dict()
        if self.task == "Regression":
            func = rnp.tmva.add_regression_events
        else:
            func = rnp.tmva.add_classification_events
            extra_kwargs["signal_label"] = signal_label

        # test exceptions
        assert_raises(TypeError, func, object(), X, y)
        assert_raises(ValueError, func, self.factory, X, y[: y.shape[0] / 2])
        if weights is not None:
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[: weights.shape[0] / 2])
            assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis])

        assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1])
        assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]])

        func(self.factory, X, y, weights=weights, **extra_kwargs)
        if X_test is None:
            X_test = X
            y_test = y
            weights_test = weights
        func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs)

        self.factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents")
        options = []
        for param, value in kwargs.items():
            if value is True:
                options.append(param)
            elif value is False:
                options.append("!{0}".format(param))
            else:
                options.append("{0}={1}".format(param, value))
        options = ":".join(options)
        self.factory.BookMethod(self.method, self.method, options)
        self.factory.TrainAllMethods()
Ejemplo n.º 3
0
def main():

    try:
        # retrive command line options
        shortopts  = "a:o:r:vh?"
        longopts   = ["analysis=","outputfile=", "regression=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    _regression = REGRESSION

    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-r", "--regression"):
            _regression = True
        elif o in ("-v", "--verbose"):
            verbose = True

            
    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA


    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_" + _analysis

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    if _analysis == "Dijet":

        if not _regression:
            factory.AddVariable("H_mass := H.mass", 'F');
            factory.AddVariable("H_pt :=H.pt", 'F');
            factory.AddVariable("hJet_pt1 := hJet_pt[0]", 'F')
            factory.AddVariable("hJet_pt2 := hJet_pt[1]", 'F')
        else:
            factory.AddVariable("HCorr_mass := newHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newHiggsPt", 'F');
            factory.AddVariable("hJ1Corr_pt := hJet_genPtReg0", 'F');
            factory.AddVariable("hJ2Corr_pt := hJet_genPtReg1", 'F');

        factory.AddVariable("V_pt :=V.pt", 'F');
        factory.AddVariable("H_dR := H.dR", 'F');
        factory.AddVariable("hJ12_MaxCsv := max(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("hJ12_MinCsv := min(hJet_csv[0],hJet_csv[1])", 'F');
        factory.AddVariable("HV_dPhi := HVdPhi", 'F');
        factory.AddVariable("H_dEta := H.dEta", 'F');
        factory.AddVariable("NAddJet:=Sum$(aJet_pt>20 && abs(aJet_eta)<4.5)", 'I' );
        factory.AddVariable("dPull := deltaPullAngle", 'F');
        

        # You can add so-called "Spectator variables", which are not used in the MVA training, 
        # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
        # input variables, the response values of all trained MVAs, and the spectator variables
        #factory.AddSpectator("hJet_pt1 := hJet_pt[0]", 'F');
        #factory.AddSpectator("hJet_pt2 := hJet_pt[1]", 'F');

    elif _analysis == "Subjet":

        if not _regression:
            factory.AddVariable("H_mass := FatH.filteredmass", 'F');
            factory.AddVariable("H_pt   := FatH.filteredpt", 'F');
            factory.AddVariable("SJ1_pt := fathFilterJets_pt[0]", 'F');
            factory.AddVariable("SJ2_pt := fathFilterJets_pt[1]", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F');
        else:
            factory.AddVariable("HCorr_mass := newfatHiggsMass", 'F');
            factory.AddVariable("HCorr_pt := newfatHiggsPt", 'F');
            factory.AddVariable("SJ1Corr_pt := fathFilterJets_genPtReg0", 'F');
            factory.AddVariable("SJ2Corr_pt := fathFilterJetsx_genPtReg1", 'F');
            factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); #change later
            
        factory.AddVariable("V_pt   := V.pt", 'F');
        factory.AddVariable("HV_dPhi := " +\
                             "FatH.filteredphi - V.phi > pi ? " +\
                             "abs(FatH.filteredphi - V.phi - 2*pi) : " +\
                             "FatH.filteredphi - V.phi < -pi ? " +\
                             "abs(FatH.filteredphi - V.phi + 2*pi) : " +\
                             "abs(FatH.filteredphi - V.phi)", 'F' )

        factory.AddVariable("SJ1_csv := fathFilterJets_csv[0]", 'F');
        factory.AddVariable("SJ2_csv := fathFilterJets_csv[1]", 'F');
        factory.AddVariable("SJ3_csv := Alt$(fathFilterJets_csv[2],0)", 'F');

        factory.AddVariable("SJ12_dEta := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fabs(fathFilterJets_eta[0] - fathFilterJets_eta[1] )", 'F');
        
        factory.AddVariable("SJ13_dEta := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "abs( fathFilterJets_eta[0] - Alt$(fathFilterJets_eta[2],0))", 'F');

        factory.AddVariable("SJ12_dPhi := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] > pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] - 2*pi) : " +\
                                "fathFilterJets_phi[0] - fathFilterJets_phi[1] < -pi ? " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] + 2*pi) : " +\
                                "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1])", 'F');

        factory.AddVariable("SJ13_dPhi := " +\
                                "nfathFilterJets < 3 ? 0 : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) > pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) - 2*pi) : " +\
                                "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) < -pi ? " +\
                                "abs(fathFilterJets_phi[0] - " +\
                                "Alt$(fathFilterJets_phi[2],0) + 2*pi) : " +\
                                "abs(fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("SJ12_dR := " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],fathFilterJets_eta[1],fathFilterJets_phi[1])", 'F');
        
        factory.AddVariable("SJ13_dR := nfathFilterJets < 3 ? 0 : " +\
                                "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],Alt$(fathFilterJets_eta[2],0),Alt$(fathFilterJets_phi[2],0))", 'F');

        factory.AddVariable("NAddJet:= " +\
                                "nfathFilterJets < 2 ? 0 : " +\
                                "Sum$(aJet_pt>20 && abs(aJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],aJet_eta,aJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],aJet_eta,aJet_phi)>0.3)+Sum$(hJet_pt>20 && abs(hJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],hJet_eta,hJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],hJet_eta,hJet_phi)>0.3)", 'I' );
       

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    ## Get the Signal and Background trees
    for Sample in SAMPLES.keys():
        SampleInfo=SAMPLES[Sample]

        SampleType=SampleInfo[0] # signal or background
        infile=os.path.join(INPUTDIR,SampleInfo[1])
        xs=SampleInfo[2]

        ## get number of step 1 events
        f=TFile.Open(infile)
        h = f.Get("Count")
        nEVT=int(h.GetBinContent(1))

        wt  =xs/(nEVT)        
        print Sample,": ",infile
        print "XS:nEVT:wt: ", xs,nEVT,wt

        theTree      = f.Get( TREE )
        if SampleType == "S":
            factory.AddSignalTree    ( theTree, wt )
        elif SampleType == "B":
            factory.AddBackgroundTree( theTree, wt )        
        else:
            print "Trouble extracting SampleType for this sample"
            sys.exit(1)
        

    # table10 AN-2011/430
    if _analysis == "Dijet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "H.HiggsFlag > 0"        + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.244"  + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " 

        if not _regression:
            cutString += \
                "hJet_pt[0] > 20.0"      + " && " +\
                "hJet_pt[1] > 20.0"      + " && " +\
                "H.mass > 80.0"          + " && " +\
                "H.mass < 150.0"         

        else:
            cutString += \
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "hJet_genPtReg0 > 20.0"           + " && " +\
                "newHiggsMass > 80.0 && newHiggsMass < 150.0"

    elif _analysis == "Subjet":
        cutString=\
            "Vtype == 0"             + " && " +\
            "vLepton_pt[0]>20."      + " && " +\
            "FatH.FatHiggsFlag > 0"  + " && " +\
            "V.mass > 75.0"          + " && " +\
            "V.mass < 105.0"         + " && " +\
            "V.pt > 100.0"           + " && " +\
            "nfathFilterJets >= 2"   + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244"  + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && "

        if not _regression:
            cutString += \
                "fathFilterJets_pt[0] > 20.0"      + " && " +\
                "fathFilterJets_pt[1] > 20.0"      + " && " +\
                "FatH.filteredmass > 80.0"         + " && " +\
                "FatH.filteredmass < 150.0"     
        else:
            cutString += \
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "fathFilterJets_genPtReg0 > 20.0"           + " && " +\
                "newfatHiggsMass > 80.0 && newfatHiggsMass < 150.0"


    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)


    print cutString
    mycutSig = TCut( cutString ) 
    mycutBkg = TCut( cutString ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples

    prepareOptions="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V"
    #prepareOptions="SplitMode=Random:!V"
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, prepareOptions)


    bdtOptions = \
        "!H"                       + ":" +\
        "!V"                       + ":" +\
        "NTrees=850"               + ":" +\
        "nEventsMin=150"           + ":" +\
        "MaxDepth=3"               + ":" +\
        "BoostType=AdaBoost"       + ":" +\
        "AdaBoostBeta=0.3"         + ":" +\
        "SeparationType=GiniIndex" + ":" +\
        "nCuts=20"                 + ":" +\
        "PruneMethod=NoPruning"
    # "PruneMethod=CostComplexity"

    # 

    print bdtOptions

    factory.BookMethod( TMVA.Types.kBDT, "BDT", bdtOptions)
   

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % _outfname
    print "=== TMVAClassification is done!\n"
Ejemplo n.º 4
0
branches_miniTree = {}

print "Initializing TMVA..."
# Setup TMVA
TMVA.Tools.Instance()
#TMVA.PyMethodBase.PyInitialize()
mvaReaderList = [
    'readerA_3glb', 'readerB_3glb', 'readerC_3glb', 'readerA_2glbTrk',
    'readerB_2glbTrk', 'readerC_2glbTrk'
]
mvaReaders = {}
mvaWeights = {}

print "Setting up reader..."
for reader in mvaReaderList:
    mvaReaders[reader] = TMVA.Reader('Color:!Silent')
    tmpstr = 'TMVAClassification_2016vars_' + fileStr(reader)
    mvaWeights[reader] = TString(
        BASEDIR + tmpstr + '/weights/' + tmpstr + '_BDT.weights.xml'
    )  # weights weights.xml file after training, place it to CommonFiles

# Open input file and set branch addresses
rootfile = TFile(filename, 'READ')
rootfile.ls()
tree_bkg = rootfile.Get('TreeB')
tree_ds = rootfile.Get('TreeS_Ds')
tree_bu = rootfile.Get('TreeS_Bu')
tree_bd = rootfile.Get('TreeS_Bd')

for branchName in varlist_train:
    branches[branchName] = array('f', [-999])
from ROOT import TMVA, TFile, TTree, TCut
import ROOT

# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()

output = TFile.Open('Classification.root', 'RECREATE')
factory = TMVA.Factory('TMVAClassification', output,
         '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')
 
data = TFile.Open('ML_train.root')
signal = data.Get('signal_train') #naming things signal breaks ROOT
background = data.Get('background_train')

dataloader = TMVA.DataLoader('dataset')
n = 0
for branch in signal.GetListOfBranches():
    dataloader.AddVariable(branch.GetName()) #loading data    
    n += 1
dataloader.AddSignalTree(signal, 1.0) ### second arg is weight
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(TCut(''),
		'nTrain_Signal=25000:nTrain_Background=6716:SplitMode=Random:NormMode=NumEvents:!V') # controlling training populations
		### numbers here chosen to keep nTrain_Signal/Total_Train = n_Test_Signal/Total_Test (equal ratios)
        #'nTest_Signal=2000:nTest_Background=2000:SplitMode=Random:NormMode=NumEvents:!V') # controlling testing populations
 
factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT", "MaxDepth=5:NTrees=1600:BoostType=AdaBoost:AdaBoostBeta=0.5" ) 

# Run training, test and evaluation
factory.TrainAllMethods()
Ejemplo n.º 6
0
def process_event(i_final_histo_name_, i_input_signal_file_name_,
                  i_input_bkg_file_name_):
    print "at start of process event"
    root.TMVA.Tools.Instance()
    #root.TMVA.PyMethodBase.PyInitialize()

    _output_training_name_ = i_final_histo_name_
    _input_signal_name_ = i_input_signal_file_name_
    _input_bkg_ee_qq_name_ = i_input_bkg_file_name_[0]
    _input_bkg_ee_qqqq_name_ = i_input_bkg_file_name_[1]
    _input_bkg_ee_qqqqqq_name_ = i_input_bkg_file_name_[2]

    fileout = root.TFile(_output_training_name_, "RECREATE")

    input_file_signal_ = root.TFile.Open(_input_signal_name_)
    input_file_bkg_ee_qq_ = root.TFile.Open(_input_bkg_ee_qq_name_)
    input_file_bkg_ee_qqqq_ = root.TFile.Open(_input_bkg_ee_qqqq_name_)
    input_file_bkg_ee_qqqqqq_ = root.TFile.Open(_input_bkg_ee_qqqqqq_name_)

    tree_sig = input_file_signal_.Get("MVATrainingVariables")
    tree_bkg_ee_qq = input_file_bkg_ee_qq_.Get("MVATrainingVariables")
    tree_bkg_ee_qqqq = input_file_bkg_ee_qqqq_.Get("MVATrainingVariables")
    tree_bkg_ee_qqqqqq = input_file_bkg_ee_qqqqqq_.Get("MVATrainingVariables")

    #flags V is if verbose set or not (typically false) , if it should be silent or not (silent== no output from mva, default false) ,transformations can be
    #I;D;P;U;G,D,for identity, decorrelation,PCA,Uniform   and Gaussianisation  followed  by  decorrelation transformations
    #analysistype can be Classification,Regression,Multiclass or Auto (default Auto)
    factory = TMVA.Factory(
        "TMVAClassification", fileout, ":".join([
            "!V", "!Silent", "Color", "DrawProgressBar",
            "Transformations=I;D;P;G,D", "AnalysisType=Classification"
        ]))

    dataloader = TMVA.DataLoader('dataset')
    dataloader.SetWeightExpression("eventWeight")
    dataloader.SetSignalWeightExpression("eventWeight")
    dataloader.SetBackgroundWeightExpression("eventWeight")
    dataloader.AddVariable("jet1_mass", 'F')
    dataloader.AddVariable("jet2_mass", 'F')
    dataloader.AddVariable("jet1_theta", 'F')
    dataloader.AddVariable("jet2_theta", 'F')
    dataloader.AddVariable("jet1_D2_beta1", 'F')
    dataloader.AddVariable("jet2_D2_beta1", 'F')
    dataloader.AddVariable("jet1_BTag_rfj_BTagMax", 'F')
    dataloader.AddSpectator("jet1_CTag_rfj_CTagMax", 'F')
    #dataloader.AddVariable("jet1_BTag_rfj_BTagMax", 'F')
    dataloader.AddVariable("jet1_C2_beta1", 'F')
    dataloader.AddVariable("jet2_C2_beta1", 'F')
    dataloader.AddVariable("jet1_tau21", 'F')
    dataloader.AddVariable("jet2_tau21", 'F')
    dataloader.AddSpectator("costheta1_for_Atheta1", 'F')
    dataloader.AddSpectator("costheta2_for_Atheta1theta2", 'F')
    dataloader.AddSpectator("phi_for_Aphis", 'F')
    dataloader.AddVariable("reco_y32", 'F')
    dataloader.AddSpectator("reco_y43", 'F')
    dataloader.AddSpectator("dphi_j1j2", 'F')
    dataloader.AddSpectator("jet1_d21", 'F')
    dataloader.AddSpectator("jet1_d32", 'F')
    dataloader.AddSpectator("jet1_d43", 'F')
    dataloader.AddSpectator("jet2_d21", 'F')
    dataloader.AddSpectator("jet2_d32", 'F')
    dataloader.AddSpectator("jet2_d43", 'F')

    dataloader.AddVariable("jet1_C3_beta1", 'F')
    dataloader.AddVariable("jet2_C3_beta1", 'F')
    dataloader.AddSpectator("jet1_N2_beta1", 'F')
    dataloader.AddSpectator("jet2_N2_beta1", 'F')
    dataloader.AddSpectator("jet1_N3_beta1", 'F')
    dataloader.AddSpectator("jet2_N3_beta1", 'F')

    def_weight = 1

    dataloader.AddSignalTree(tree_sig, def_weight, "Training and Testing")
    dataloader.AddBackgroundTree(tree_bkg_ee_qq, def_weight,
                                 "Training and Testing")
    dataloader.AddBackgroundTree(tree_bkg_ee_qqqq, def_weight,
                                 "Training and Testing")
    dataloader.AddBackgroundTree(tree_bkg_ee_qqqqqq, def_weight,
                                 "Training and Testing")

    #method = factory.BookMethod(ROOT.TMVA.Types.kBDT, "BDT",
    #               ":".join([
    #                   "!H",
    #                   "!V",
    #                   "NTrees=850",-->default 800
    #                   "nEventsMin=150", -->default is 0
    #                   "MaxDepth=3",-->default
    #                   "BoostType=AdaBoost",-->default
    #                   "AdaBoostBeta=0.5", -->default
    #                   "SeparationType=GiniIndex",-->default, e.g. SDivSqrtSPlusB"
    #                   "nCuts=20",-->default
    #                   "PruneMethod=NoPruning",-->default
    #                   ]))

    #factory.TrainAllMethods()
    #factory.TestAllMethods()
    #factory.EvaluateAllMethods()

    cut_S = TCut("")
    cut_B = TCut("")
    dataloader.PrepareTrainingAndTestTree(
        cut_S,
        cut_B,
        ":".join([
            "V",
            "nTrain_Signal=0",
            "nTrain_Background=0",
            "SplitMode=Random",
            "NormMode=NumEvents",
            #"NormMode=None",
        ]))

    factory.BookMethod(
        dataloader,
        root.TMVA.Types.kBDT,
        "BDT",
        ":".join([
            "!H",
            "V",
            "NTrees=300",
            "MaxDepth=3",
            #"BoostType=Grad",
            #"Shrinkage=1.00",
            "BoostType=AdaBoost",
            "AdaBoostBeta=0.20",
            #"SeparationType=GiniIndexWithLaplace",
            "SeparationType=GiniIndex",
            "nCuts=-1",
            "PruneMethod=NoPruning",
            #"SkipNormalization",
        ]))

    #factory.WriteDataInformation()

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()
    fileout.Close()

    return None
Ejemplo n.º 7
0
## This tutorial shows how to apply a trained model to new data (regression).
##
## \macro_code
##
## \date 2017
## \author TMVA Team

from ROOT import TMVA, TFile, TString
from array import array
from subprocess import call
from os.path import isfile

# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()
reader = TMVA.Reader("Color:!Silent")

# Load data
if not isfile('tmva_reg_example.root'):
    call(['curl', '-O', 'http://root.cern.ch/files/tmva_reg_example.root'])

data = TFile.Open('tmva_reg_example.root')
tree = data.Get('TreeR')

branches = {}
for branch in tree.GetListOfBranches():
    branchName = branch.GetName()
    branches[branchName] = array('f', [-999])
    tree.SetBranchAddress(branchName, branches[branchName])
    if branchName != 'fvalue':
        reader.AddVariable(branchName, branches[branchName])
Ejemplo n.º 8
0
def RunTraining(dataset, optimization):
    # Setup TMVA
    TMVA.PyMethodBase.PyInitialize()
    output = TFile.Open('GGFKiller_%s_%s.root' % (dataset, optimization),
                        'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification%s%s' % (dataset, optimization), output,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification'
    )
    #Locate and add data files
    file_GGF_HH_2016 = "../FullNtuples/New2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_PM.root"
    file_VBF_HH_2016 = "../FullNtuples/New2016/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph_PM.root"
    file_VBF2_HH_2016 = "../FullNtuples/New2016/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_13TeV-madgraph_PM.root"
    file_GGF_HH_2017 = "../FullNtuples/New2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg_PM.root"
    file_VBF_HH_2017 = "../FullNtuples/New2017/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph_PM.root"
    file_VBF2_HH_2017 = "../FullNtuples/New2017/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_13TeV-madgraph_PM.root"
    file_GGF_HH_2018 = "../FullNtuples/New2018/SKIM_GluGluToHHTo4B_node_SM_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root"
    file_VBF_HH_2018 = "../FullNtuples/New2018/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root"
    file_VBF2_HH_2018 = "../FullNtuples/New2018/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root"
    #Add the data files
    ch_sig = TChain("bbbbTree")
    ch_bkg = TChain("bbbbTree")
    #AddfILES
    if optimization == "SM":
        seed = 2020
        if dataset == '2016':
            ch_sig.AddFile(file_VBF_HH_2016)
            ch_bkg.AddFile(file_GGF_HH_2016)
        elif dataset == '2017':
            ch_sig.AddFile(file_VBF_HH_2017)
            ch_bkg.AddFile(file_GGF_HH_2017)
        elif dataset == '2018':
            ch_sig.AddFile(file_VBF_HH_2018)
            ch_bkg.AddFile(file_GGF_HH_2018)
        else:
            ch_sig.AddFile(file_VBF_HH_2016)
            ch_bkg.AddFile(file_GGF_HH_2016)
            ch_sig.AddFile(file_VBF_HH_2017)
            ch_bkg.AddFile(file_GGF_HH_2017)
            ch_sig.AddFile(file_VBF_HH_2018)
            ch_bkg.AddFile(file_GGF_HH_2018)
    else:
        if dataset == '2016':
            seed = 2017
            ch_sig.AddFile(file_VBF2_HH_2016)
            ch_bkg.AddFile(file_GGF_HH_2016)
        elif dataset == '2017':
            seed = 2020
            ch_sig.AddFile(file_VBF2_HH_2017)
            ch_bkg.AddFile(file_GGF_HH_2017)
        elif dataset == '2018':
            seed = 2021
            ch_sig.AddFile(file_VBF2_HH_2018)
            ch_bkg.AddFile(file_GGF_HH_2018)
        else:
            ch_sig.AddFile(file_VBF2_HH_2016)
            ch_bkg.AddFile(file_GGF_HH_2016)
            ch_sig.AddFile(file_VBF2_HH_2017)
            ch_bkg.AddFile(file_GGF_HH_2017)
            ch_sig.AddFile(file_VBF2_HH_2018)
            ch_bkg.AddFile(file_GGF_HH_2018)
    #Load data to TMVA
    dataloader = TMVA.DataLoader('GGFKiller')
    if optimization == "SM":
        dataloader.AddVariable("H1_pt")
        dataloader.AddVariable("H2_pt")
        dataloader.AddVariable("JJ_j1_pt")
        dataloader.AddVariable("JJ_j2_pt")
        dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)")
        dataloader.AddVariable("h1h2_deltaR")
        dataloader.AddVariable("h1j1_deltaR")
        dataloader.AddVariable("h1j2_deltaR")
        dataloader.AddVariable("h2j1_deltaR")
        dataloader.AddVariable("h2j2_deltaR")
        #dataloader.AddVariable("abs_j1etaj2eta:=abs(j1etaj2eta)")
        dataloader.AddVariable("abs_costh_JJ_j1_vbfcm:=abs(costh_JJ_j1_vbfcm)")
        dataloader.AddVariable("abs_costh_JJ_j2_vbfcm:=abs(costh_JJ_j2_vbfcm)")
        #dataloader.AddVariable("j1j2_deltaEta")
        dataloader.AddVariable("JJ_m")
    else:
        dataloader.AddVariable("H1_pt")
        dataloader.AddVariable("H2_pt")
        dataloader.AddVariable("JJ_j1_pt")
        dataloader.AddVariable("JJ_j2_pt")
        dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)")
        dataloader.AddVariable("h1h2_deltaR")
        dataloader.AddVariable("h1j1_deltaR")
        dataloader.AddVariable("h1j2_deltaR")
        dataloader.AddVariable("h2j1_deltaR")
        dataloader.AddVariable("h2j2_deltaR")
        dataloader.AddVariable("abs_costh_JJ_j1_vbfcm:=abs(costh_JJ_j1_vbfcm)")
        dataloader.AddVariable("abs_costh_JJ_j2_vbfcm:=abs(costh_JJ_j2_vbfcm)")
        #dataloader.AddVariable("j1j2_deltaEta")
        dataloader.AddVariable("JJ_m")
    trainingsamplefraction = 0.50
    nTrain_Signal = int(
        ch_sig.GetEntries('VBFEvent==1') * trainingsamplefraction)
    nTrain_Background = int(
        ch_bkg.GetEntries('VBFEvent==1') * trainingsamplefraction)
    print("[INFO] ML TRAINING STARTING . . .")
    print("[INFO] Signal/Background Training Fraction is %f" %
          trainingsamplefraction)
    dataloader.AddSignalTree(ch_sig, 1.0)
    dataloader.AddBackgroundTree(ch_bkg, 1.0)
    dataloader.SetSignalWeightExpression('XS*preVBFSelEff')
    dataloader.SetBackgroundWeightExpression('XS*preVBFSelEff')
    dataloader.PrepareTrainingAndTestTree(
        TCut('VBFEvent==1'),
        'nTrain_Signal=%i:nTrain_Background=%i:SplitMode=Random:!V:SplitSeed=%i'
        % (nTrain_Signal, nTrain_Background, seed))
    print("[INFO] Boosted Decision Tree Training Starting . . .")
    if optimization == 'SM':
        #best option FOR SM
        if dataset == '2016':
            nTrees = [200]
            nCuts = [200]
            nDepth = [2]
        elif dataset == '2017':
            nTrees = [250]
            nCuts = [200]
            nDepth = [2]
        else:
            nTrees = [200]
            nCuts = [200]
            nDepth = [2]
    else:
        #best option for BSM
        if dataset == '2016':
            nTrees = [200]
            nCuts = [200]
            nDepth = [2]
        elif dataset == '2017':
            nTrees = [300]
            nCuts = [350]
            nDepth = [2]
        elif dataset == '2018':
            nTrees = [200]
            nCuts = [200]
            nDepth = [2]
        else:
            nTrees = [160]
            nCuts = [160]
            nDepth = [3]
    for i in nTrees:
        for j in nCuts:
            for k in nDepth:
                factory.BookMethod(
                    dataloader, TMVA.Types.kBDT, 'BDT_%i_%i_%i' % (i, j, k),
                    '!H:!V:NTrees=%i:MinNodeSize=2.5:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=%i:MaxDepth=%i'
                    % (i, j, k))
    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()
def main():  # runs the program
    try:  # retrieve command line options
        shortopts = "o:w:y:v:s:h?"  # possible command line options
        longopts = [
            "outputfile=", "where=", "year=", "verbose", "seed=", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array([  # Stores the command line arguments    
        ['-o', '--outputfile', 'outfname', DEFAULT_OUTFNAME],
        ['-v', '--verbose', 'verbose', True],
        ['-w', '--where', 'where', "lpc"],
        ['-y', '--year', 'year', 2017],
        ['-s', '--seed', 'SeedN', DEFAULT_SEED],
    ])

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[
                index,
                3] = arg  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some variables after reading in arguments
    SeedN_index = np.where(myArgs[:, 2] == 'SeedN')[0][0]
    outfname_index = np.where(myArgs[:, 2] == 'outfname')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    seed = myArgs[SeedN_index, 3]
    where = myArgs[where_index, 3]
    year = int(myArgs[year_index, 3])
    varList = varsList.varList["DNN"]
    var_length = len(varList)

    str_xbitset = '{:0{}b}'.format(long(myArgs[SeedN_index, 3]), var_length)
    nVars = str_xbitset.count('1')
    outf_key = "DNN_" + str(nVars) + "vars"
    myArgs[outfname_index, 3] = "dataset/weights/TMVA_" + outf_key + ".root"

    print("Seed: {}".format(str_xbitset))

    outputfile = TFile(myArgs[outfname_index, 3], 'RECREATE')

    checkRootVer()  # check that ROOT version is correct

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    sig_list = []
    sig_trees_list = []
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []

    if where == "brux":
        if year == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif year == 2018:
            inputDir = varsList.inputDirBRUX2018
    else:
        inputDir = varsList.inputDirCondor

    # Set up TMVA
    ROOT.TMVA.Tools.Instance()
    ROOT.TMVA.PyMethodBase.PyInitialize()

    fClassifier = TMVA.Factory(
        'VariableImportance',
        '!V:!ROC:Silent:!Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification'
    )

    fClassifier.SetVerbose(bool(myArgs[verbose_index, 3]))

    loader = TMVA.DataLoader("dataset/" + str_xbitset)

    for indx, var in enumerate(varList):
        if (str_xbitset[indx] == '1'):
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], "I")
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")

    # add signals to loader
    if year == 2017:
        for i in range(len(varsList.sig2017_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])
    elif year == 2018:
        for i in range(len(varsList.sig2018_0)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_0[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add backgrounds to loader
    if year == 2017:
        for i in range(len(varsList.bkg2017_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)
            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif year == 2018:
        for i in range(len(varsList.bkg2018_0)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_0[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0: continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    # set signal and background weights
    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    # set cut thresholds for signal and background
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    NSIG = 0
    NSIG_TEST = 0
    NBKG = 0
    NBKG_TEST = 0

    loader.PrepareTrainingAndTestTree(
      mycutSig, mycutBkg,
      "nTrain_Signal=" + str(NSIG) + \
      ":nTrain_Background=" + str(NBKG) + \
      ":nTest_Signal=" + str(NSIG_TEST) + \
      ":nTest_Background=" + str(NBKG_TEST) + \
      ":SplitMode=Random:NormMode=NumEvents:!V"
    )

    #####################################################
    #####################################################
    ######                                         ######
    ######            K E R A S   D N N            ######
    ######                                         ######
    #####################################################
    #####################################################

    model_name = "TTTT_TMVA_model.h5"

    model = Sequential()
    model.add(
        Dense(100,
              input_dim=nVars,
              kernel_initializer="glorot_normal",
              activation="relu"))
    for i in range(2):
        model.add(BatchNormalization())
        model.add(
            Dense(100, kernel_initializer="glorot_normal", activation="relu"))
    model.add(Dense(2, activation="sigmoid"))

    model.compile(loss="categorical_crossentropy",
                  optimizer=Adam(),
                  metrics=["accuracy"])

    model.save(model_name)
    model.summary()

    ######################################################
    ######################################################
    ######                                          ######
    ######                  T M V A                 ######
    ######                                          ######
    ######################################################
    ######################################################

    # Declare some containers
    kerasSetting = "!H:!V:VarTransform=G:FilenameModel=" + model_name + \
     ":NumEpochs=15:BatchSize=512" # the trained model has to be specified in this string

    # run the classifier
    fClassifier.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras",
                           kerasSetting)

    (TMVA.gConfig().GetIONames()
     ).fWeightFileDir = str_xbitset + "/weights/" + outf_key
    #print("New weight file directory: {}".format((TMVA.gConfig().GetIONames()).fWeightFileDir))

    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral("dataset/" + str_xbitset, "PyKeras")
    print("ROC-integral: {}".format(SROC))
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()

    outputfile.Close()
Ejemplo n.º 10
0
import ROOT, array
import ROOT, array
from ROOT import TFile, TH1F, TGraph, TCanvas, TLegend, TTree, TList
from ROOT import TMVA, TMath
import sys

print sys.argv
process =  sys.argv[1] 

reader = TMVA.Reader("!V")
nJet = array.array('f',[0])
mindr_lep1_jet = array.array('f',[0])
mindr_lep2_jet = array.array('f',[0])
mindr_lep3_jet = array.array('f',[0])
avg_dr_jet = array.array('f',[0])
lep1_abs_eta = array.array('f',[0])
lep2_abs_eta = array.array('f',[0])
lep3_abs_eta = array.array('f',[0])
max_lep_eta = array.array('f',[0])
lep1_conePt = array.array('f',[0])
lep2_conePt = array.array('f',[0])
lep3_conePt = array.array('f',[0])
mindr_tau_jet = array.array('f',[0])
ptmiss = array.array('f',[0])
mT_lep1 = array.array('f',[0])
mT_lep2 = array.array('f',[0])
mT_lep3 = array.array('f',[0])
htmiss = array.array('f',[0])
dr_leps = array.array('f',[0])
tau_pt = array.array('f',[0])
tau_abs_eta = array.array('f',[0])
Ejemplo n.º 11
0
# Declare Factory
from ROOT import TMVA, TFile, TTree, TCut, TString

# Declare Variables in DataLoader
TMVA.Tools.Instance()

inputFile = TFile.Open(
    "https://github.com/iml-wg/tmvatutorials/raw/master/inputdata.root")
outputFile = TFile.Open("TMVAOutputDNN.root", "RECREATE")

factory = TMVA.Factory(
    "TMVAClassification", outputFile,
    "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")

# Declare Variables in DataLoader
loader = TMVA.DataLoader("dataset_dnn")

loader.AddVariable("var1")
loader.AddVariable("var2")
loader.AddVariable("var3")
loader.AddVariable("var4")
loader.AddVariable("var5 := var1-var3")
loader.AddVariable("var6 := var1+var2")

# Setup Dataset(s)
tsignal = inputFile.Get("Sig")
tbackground = inputFile.Get("Bkg")

loader.AddSignalTree(tsignal)
loader.AddBackgroundTree(tbackground)
loader.PrepareTrainingAndTestTree(
Ejemplo n.º 12
0
## \macro_code
##
## \author Lailin XU

from ROOT import TMVA, TFile, TTree, TCut, TH1F, TCanvas, gROOT, TLegend
from subprocess import call
from os.path import isfile
from array import array

gROOT.SetStyle("ATLAS")

# Setup TMVA
TMVA.Tools.Instance()

# Reader. One reader for each application.
reader = TMVA.Reader("Color:!Silent")
reader_S = TMVA.Reader("Color:!Silent")
reader_B = TMVA.Reader("Color:!Silent")

# Inputs
# =============
# Load data
# An unknown sample
trfile = "Zp2TeV_ttbar.root"
data = TFile.Open(trfile)
tree = data.Get('tree')

# Known signal
trfile_S = "Zp1TeV_ttbar.root"
data_S = TFile.Open(trfile_S)
tree_S = data_S.Get('tree')
Ejemplo n.º 13
0
def  RunTraining(dataset,optimization):
	 # Setup TMVA
	 TMVA.PyMethodBase.PyInitialize()     
	 output = TFile.Open('GGFQCDKiller_BR_%s_%s.root'%(dataset,optimization), 'RECREATE')
	 factory = TMVA.Factory('TMVAClassification%s%s'%(dataset,optimization), output,
			 '!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification')
	 #Load Data
	 #Load 2016 SIMULATION
	 file_GGF_HH_2016        = "../FullNtuples/New2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_BR.root"
	 file_QCD_MODEL_2016     = "../FullNtuples/New2016/SKIM_BKG_MODEL_BR.root"
	 #Load 2017 SIMULATION
	 file_GGF_HH_2017        = "../FullNtuples/New2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg_BR.root"
	 file_QCD_MODEL_2017     = "../FullNtuples/New2017/SKIM_BKG_MODEL_BR.root"
	 #Load 2018 SIMULATION
	 file_GGF_HH_2018        = "../FullNtuples/New2018/SKIM_GluGluToHHTo4B_node_SM_TuneCP5_PSWeights_13TeV-madgraph-pythia8_BR.root"
	 file_QCD_MODEL_2018     = "../FullNtuples/New2018/SKIM_BKG_MODEL_BR.root"

	 #COMBINE THE INFORMATION
	 ch_sig = TChain("bbbbTree")
	 ch_bkg = TChain("bbbbTree") 
	 #AddfILES       
	 if dataset=='2016':
		ch_sig.AddFile(file_GGF_HH_2016)
		ch_bkg.AddFile(file_QCD_MODEL_2016) 
	 elif dataset=='2017': 
		ch_sig.AddFile(file_GGF_HH_2017)
		ch_bkg.AddFile(file_QCD_MODEL_2017)   
	 elif dataset=='2018': 	      
		ch_sig.AddFile(file_GGF_HH_2018)
		ch_bkg.AddFile(file_QCD_MODEL_2018)  
	 else:
		ch_sig.AddFile(file_GGF_HH_2016)
		ch_bkg.AddFile(file_QCD_MODEL_2016)  
	 #LOAD VARIABLES	
	 dataloader = TMVA.DataLoader('GGFQCDKiller')
	 dataloader.AddVariable("H1_pt")
	 dataloader.AddVariable("H2_pt")
	 dataloader.AddVariable("H1_m")
	 dataloader.AddVariable("H2_m")
	 dataloader.AddVariable("HH_m")
	 dataloader.AddVariable("h1h2_deltaEta")
	 dataloader.AddVariable("H1_bb_deltaR")
	 dataloader.AddVariable("H2_bb_deltaR")
	 dataloader.AddVariable("abs_costh_HH_b1_cm:=abs(costh_HH_b1_ggfcm)")
	 dataloader.AddVariable("HH_btag_b3_bscore")
	 dataloader.AddVariable("HH_btag_b3_bres")	 
	 trainingsamplefraction = 0.50 #next,also try 0.5 0.6
	 nTrain_Signal          = int(ch_sig.GetEntries('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)')*trainingsamplefraction) 
	 nTrain_Background      = int(ch_bkg.GetEntries('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)')*trainingsamplefraction)
	 print("[INFO] ML TRAINING STARTING . . .")
	 print("[INFO] Signal/Background Training Fraction is %f"%trainingsamplefraction) 
	 dataloader.AddSignalTree(ch_sig, 1.0)
	 dataloader.AddBackgroundTree(ch_bkg, 1.0)
	 dataloader.SetBackgroundWeightExpression('Weight_AnaGGF')
	 #best option FOR SM
	 if optimization=='SM':
		 if dataset=='2016':
			 seed=2019
			 nTrees =[350]
			 nCuts = [200]
			 nDepth = [2]
		 elif dataset=='2017':
			 seed=2019
			 nTrees =[300]
			 nCuts = [300]
			 nDepth = [2]
		 elif dataset=='2018':
			 seed=2020
			 nTrees =[250]
			 nCuts = [250]
			 nDepth = [2]
		 elif dataset=='20172018':
			 seed=2020
			 nTrees =[250]
			 nCuts = [250]
			 nDepth = [2]
		 else:
			 seed=2020
			 nTrees =[200]
			 nCuts = [200]
			 nDepth = [2]	 
	 else:
		 if dataset=='2016':
			 seed=2019
			 nTrees =[300]
			 nCuts = [300]
			 nDepth = [2]
		 elif dataset=='2017':
			 seed=2020
			 nTrees =[300]
			 nCuts = [300]
			 nDepth = [2]
		 elif dataset=='2018':
			 seed=2021
			 nTrees =[300]
			 nCuts = [300]
			 nDepth = [2]
		 elif dataset=='20172018':
			 seed=2020
			 nTrees =[250,300,350]
			 nCuts = [250,300,350]
			 nDepth = [2]
		 else:
			 seed=2020
			 nTrees =[200]
			 nCuts = [200]
			 nDepth = [2]
	 dataloader.PrepareTrainingAndTestTree(TCut('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)'),'nTrain_Signal=%i:nTrain_Background=%i:SplitMode=Random:!V:SplitSeed=%i'%(nTrain_Signal,nTrain_Background,seed))
	 for i in nTrees:
	   for j in nCuts:
		 for k in nDepth:
			 factory.BookMethod(dataloader, TMVA.Types.kBDT, 'BDT3_%i_%i_%i'%(i,j,k),
			   '!H:!V:NTrees=%i:MinNodeSize=2.5:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=%i:MaxDepth=%i'%(i,j,k))
	 # Run training, test and evaluation
	 factory.TrainAllMethods()
	 factory.TestAllMethods()
	 factory.EvaluateAllMethods()
Ejemplo n.º 14
0
def main():

    usage = 'usage: %prog [options]'
    parser = optparse.OptionParser(usage)
    parser.add_option('-s', '--signal_sample',        dest='input_file_name_signal'  ,      help='signal sample path',      default='samples/DiLepTR_ttH_bInclude.root',        type='string')
    parser.add_option('-x', '--bckg1_sample',        dest='input_file_name_ttJets'  ,      help='background sample 1 path',      default='samples/DiLepTR_ttJets_bInclude.root',        type='string')
    parser.add_option('-y', '--bckg2_sample',        dest='input_file_name_ttV'  ,      help='background sample 2 path',      default='samples/DiLepTR_ttV_bInclude.root',        type='string')
    parser.add_option('-a', '--activation',        dest='activation_function'  ,      help='activation function',      default='relu',        type='string')
    parser.add_option('-l', '--hidden_layers',        dest='number_of_hidden_layers'  ,      help='number of hidden layers',      default='2',        type='int')
    parser.add_option('-t', '--var_transform',        dest='var_transform_name'  ,      help='transformation used on input variables',      default='None',        type='string')
    parser.add_option('-j', '--json',        dest='json'  ,      help='json file with list of variables',      default=None,        type='string')
    parser.add_option('-r', '--learning_rate',        dest='learning_rate'  ,      help='learning rate',      default=0.01,        type='float')
    parser.add_option('-n', '--num_epochs',        dest='num_epochs'  ,      help='number of epochs',      default=10,        type='string')

    (opt, args) = parser.parse_args()

    number_of_hidden_layers = opt.number_of_hidden_layers
    activation_function = opt.activation_function
    var_transform_name = opt.var_transform_name
    num_epochs = opt.num_epochs
    jsonFile = open(opt.json,'r')
    new_variable_list = json.load(jsonFile,encoding='utf-8').items()
    learning_rate = opt.learning_rate
    layer_nodes = 40

    # Setup TMVA interface to use Keras
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    if ',' in var_transform_name:
        var_transform_name_list = var_transform_name.split(',')
        new_var_transform_name = '+'.join(var_transform_name_list)
        print 'new_var_transform_name: ', new_var_transform_name
    else:
        print 'var_transform_name = ', var_transform_name
        new_var_transform_name = var_transform_name
        print 'new_var_transform_name: ' , new_var_transform_name

    num_inputs = 0
    for key, value in new_variable_list:
        num_inputs = num_inputs + 1
    print 'num inputs = ' , str(num_inputs)
    classifier_parent_dir = 'V8-DNN_%sVars_%sHLs_%s_%s-VarTrans_%s-learnRate_%s-epochs-%s-nodes' % (str(num_inputs),str(number_of_hidden_layers),activation_function,new_var_transform_name,str(learning_rate),num_epochs,str(layer_nodes))
    classifier_samples_dir = classifier_parent_dir+"/outputs"
    if not os.path.exists(classifier_samples_dir):
        os.makedirs(classifier_samples_dir)

    output_file_name = '%s/%s.root'%(classifier_samples_dir,classifier_parent_dir)
    output_file = TFile.Open(output_file_name,'RECREATE')

    # 'AnalysisType' is where one defines what kind of analysis you're doing e.g. multiclass, Classification ....
    # VarTransform: Decorrelation, PCA-transformation, Gaussianisation, Normalisation (for all classes if none is specified).
    # When transformation is specified in factory object, the transformation is only used for informative purposes (not used for classifier inputs).
    # Distributions can be found in output to see how variables would look if transformed.
    factory_name = 'Factory_%s' % (classifier_parent_dir)
    factory_string = '!V:!Silent:Color:DrawProgressBar:Transformations=%s:AnalysisType=multiclass' % var_transform_name
    factory = TMVA.Factory(factory_name, output_file,factory_string)

    #Load data
    input_file_name_signal = opt.input_file_name_signal
    data_signal = TFile.Open(input_file_name_signal)
    signal = data_signal.Get('syncTree')

    input_file_name_ttJets = opt.input_file_name_ttJets
    data_bckg_ttJets = TFile.Open(input_file_name_ttJets)
    background_ttJets = data_bckg_ttJets.Get('syncTree')

    input_file_name_ttV = opt.input_file_name_ttV
    data_bckg_ttV = TFile.Open(input_file_name_ttV)
    background_ttV = data_bckg_ttV.Get('syncTree')

    # Declare a dataloader interface
    dataloader_name = classifier_parent_dir
    dataloader = TMVA.DataLoader(dataloader_name)

    # Can add selection cuts via:
    # dataloader.AddTree(background_ttJets, 'Background_1', 'myvar > cutBarrelOnly && myEventTypeVar=1', backgroundWeight)

    ### Global event weights ###
    signalWeight = 1.
    backgroundWeight0 = 1.
    backgroundWeight1 = 1.
    dataloader.AddTree(signal, 'ttH', signalWeight)
    dataloader.AddTree(background_ttV, 'ttV', backgroundWeight0)
    dataloader.AddTree(background_ttJets, 'ttJets', backgroundWeight1)

    branches = {}
    for key, value in new_variable_list:
        dataloader.AddVariable(str(key))
        branches[key] = array('f', [-999])
        print 'variable: ', key
        branchName = ''
        #if 'hadTop_BDT' in key:
        #    branchName = 'hadTop_BDT'
        #elif 'Hj1_BDT' in key:
        #    branchName = 'Hj1_BDT'
        #else:
        #    branchName = key
        branchName = key
    dataloader.AddSpectator('nEvent','F')

    # Nominal event weight:
    # event weight = puWgtNom * trigWgtNom * lepSelEffNom * genWgt * xsecWgt (* 0 or 1 depending on if it passes event selection)

    dataloader.SetWeightExpression("EventWeight", "ttH")
    dataloader.SetWeightExpression("EventWeight", "ttV")
    dataloader.SetWeightExpression("EventWeight", "ttJets")

    # NormMode: Overall renormalisation of event-by-event weights used in training.
    # "NumEvents" = average weight of 1 per event, independantly renormalised for signal and background.
    # "EqualNumEvents" = average weight of 1 per signal event, sum of weights in background equal to sum of weights for signal.
    #dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:NormMode=EqualNumEvents')
    dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:SplitMode=Random:NormMode=EqualNumEvents')

    # Generate model:
    model = Sequential()

    # Add layers to DNN
    '''
    Dense:
    # Number of nodes
    init= # Initialisation
    activation= # Activation
    input_dim= # Shape of inputs (Number of inputs). Argument only needed for first layer.
    '''

    # first hidden layer
    model.add(Dense(layer_nodes, init='glorot_normal', activation=activation_function, input_dim=len(new_variable_list)))

    #Randomly set a fraction rate of input units (defined by argument) to 0 at each update during training (helps prevent overfitting).
    #model.add(Dropout(0.2))


    # Hidden layers
    for x in xrange(number_of_hidden_layers):
        model.add(Dense(layer_nodes, activation=activation_function))

    # Output layer
    # softmax ensures output values are in range 0-1. Can be used as predicted probabilities.
    # 'softmax' activation function used in final layer so that the outputs represent probabilities (output is normalised to 1).
    model.add(Dense(3, activation='softmax'))

    # Set loss and optimizer
    # categorical_crossentropy = optimisation algorithm with logarithmic loss function
    # binary_crossentropy
    model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=learning_rate), metrics=['accuracy',])

    # Store model in file
    model.save('model.h5')
    model.summary()

    # Book methods
    # Choose classifier and define hyperparameters e.g number of epochs, model filename (as chosen above) etc.
    # VarTransform: Decorrelate, PCA, Gauss, Norm, None.
    # Transformations used in booking are used for actual training.
    logs_dir = classifier_parent_dir+'/logs'
    #factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100:Tensorboard=%s' % (var_transform_name, num_epochs, logs_dir)
    factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100' % (var_transform_name, num_epochs)
    factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "DNN", factory_string_bookMethod)

    # Run training, testing and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()
Ejemplo n.º 15
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth
    outfname = "dataset/weights/TMVA_" + Note + ".root"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    loader = TMVA.DataLoader("dataset")

    # Set verbosity
    #     factory.SetVerbose( verbose )

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    for iVar in varList:
        if iVar[0] == 'NJets_JetSubCalc':
            loader.AddVariable(iVar[0], iVar[1], iVar[2], 'I')
        else:
            loader.AddVariable(iVar[0], iVar[1], iVar[2], 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")

    loader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    bkgList = varsList.bkg

    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir + bkgList[i]))
        print inputDir + bkgList[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        loader.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    loader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    loader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : loader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: loader.SetBackgroundWeightExpression("weight1*weight2");
    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    loader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # bdtSetting for "BDT"
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTMitFisher"
    bdtFSetting = '!H:!V:NTrees=%s' % nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTG"
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray'  #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTB"
    bdtBSetting = '!H:!V:NTrees=%s' % nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTD"
    bdtDSetting = '!H:!V:NTrees=%s' % nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
    #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m

    #BOOKING AN ALGORITHM
    #     if methods=="BDT": factory.BookMethod( TMVA.Types.kBDT, "BDT",bdtSetting)
    if methods == "BDT":
        factory.BookMethod(loader, TMVA.Types.kBDT, "BDT", bdtSetting)
    if methods == "BDTG":
        factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting)
    if methods == "BDTMitFisher":
        factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting)
    if methods == "BDTB":
        factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting)
    if methods == "BDTD":
        factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting)
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the loader to train, test, and evaluate the MVAs.

    # Train MVAs
    print "train all method"
    factory.TrainAllMethods()

    print "test all method"
    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    # save plots:
    os.chdir('dataset/weights/' + Note)

    if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
    print "DONE"
Ejemplo n.º 16
0
    for period, channel in itertools.product(periods, channels):
        dijetDEta = array.array('f', [-999])
        dijetDPhi = array.array('f', [-999])
        llgJJDPhi = array.array('f', [-999])
        jPhotonDRMin = array.array('f', [-999])
        ptt = array.array('f', [-999])
        jetOnePt = array.array('f', [-999])
        jetTwoPt = array.array('f', [-999])
        kin_bdt = array.array('f', [-999])
        vbfPtBalance = array.array('f', [-999])
        photonZepp = array.array('f', [-999])

        kin_bdt_james = array.array('f', [-999])

        vbfWeightsFile = 'trained_bdts/vbf_bdt_combined_ming-yan_current.xml'
        vbf_reader = t.Reader("!Color:Silent")

        vbf_reader.AddVariable('dijetDEta', dijetDEta)
        vbf_reader.AddVariable('dijetDPhi', dijetDPhi)
        vbf_reader.AddVariable('llgJJDPhi', llgJJDPhi)
        vbf_reader.AddVariable('jPhotonDRMin', jPhotonDRMin)
        vbf_reader.AddVariable('ptt', ptt)
        vbf_reader.AddVariable('jetOnePt', jetOnePt)
        vbf_reader.AddVariable('jetTwoPt', jetTwoPt)
        vbf_reader.AddVariable('kin_bdt', kin_bdt)
        vbf_reader.AddVariable('vbfPtBalance', vbfPtBalance)
        vbf_reader.AddVariable('photonZepp', photonZepp)

        vbf_reader.BookMVA('BDT method', vbfWeightsFile)

        #vbfWeightsFileJames = 'trained_bdts/vbf_bdt_combined_james_current_half_signal_BDT.weights.xml'
Ejemplo n.º 17
0
def main(o, args):

    # Import TMVA classes from ROOT
    from ROOT import TMVA, TFile, TCut

    print o

    # Output file
    outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE')

    atype = "Classification"
    if hasattr(o, "type"):
        atype = str(o.type)
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s"
        % atype)

    # Set verbosity
    factory.SetVerbose(o.verbose)

    TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir

    # variables
    if type(o.variables) == str:
        o.variables = [
            v.lstrip().rstrip() for v in o.variables.split(":") if v != ""
        ]
    allvars = ""
    for v in o.variables:
        factory.AddVariable(str(v))
        if allvars != "": allvars += ":"
        allvars += v.split(":=")[0].lstrip(" ").rstrip(" ")
    print "variables %s" % allvars

    print o.spectators
    for s in o.spectators:
        if not s in o.variables:
            factory.AddSpectator(str(s))

    # categories and sub categories
    categories = []
    subcategories = []
    if hasattr(o, "subcategories") and len(o.subcategories) > 0:
        subcategories = o.subcategories[0]
        for sc in o.subcategories[1:]:
            subcategories = map(
                lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" %
                           (x[0][1], x[1][1])),
                itertools.product(subcategories, sc))

    for cut, name, vars in o.categories:
        myvars = allvars
        if vars != "":
            for v in vars.split(":"):
                myvars = myvars.replace(v, "").replace("::", ":")
            myvars = myvars.rstrip(":")

        vars = str(myvars)
        print vars

        if len(subcategories) > 0:
            for subcut, subname in subcategories:
                if subname == "":
                    subname = subname.replace(" ", "").replace(
                        ">", "_gt_").replace("<", "_lt_").replace(
                            "=", "_eq_").replace("&", "_and_")
                fullname = "%s_%s" % (name, subname)
                categories.append(
                    (TCut(cut) * TCut(subcut), str(fullname), vars))
        else:
            categories.append((TCut(cut), str(name), vars))

    # load tree
    selection = TCut(o.selection)
    for evclass, info in o.classes.iteritems():
        samples = info["samples"]
        for name, weight, cut, ttype in samples:
            tcut = TCut(cut) * selection
            factory.AddTree(mkChain(getListOfFiles(o.indir, o.files), name),
                            str(evclass), float(weight), tcut, int(ttype))
        # weights
        if "weight" in info:
            weight = info["weight"]
            factory.AddSpectator(str("%s_wei := %s" % (evclass, weight)))
            factory.SetWeightExpression(str(weight), str(evclass))
        else:
            factory.SetWeightExpression("1.", str(evclass))

    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        TCut(""), "SplitMode=Random:NormMode=NumEvents:!V")

    # --------------------------------------------------------------------------------------------------
    # Fisher discriminant (same as LD)
    defaultSettings = {
        "BDT":
        "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedGrad"
        ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5"
        ":Shrinkage=0.3:NTrees=1000",
        "Cuts":
        "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart"
    }
    if "FisherD" in o.methods:
        mname = "FisherD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D")

    if "Fisher" in o.methods:
        mname = "Fisher%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs")

    if "Likelihood" in o.methods:
        mname = "Likelihood%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "LikelihoodD" in o.methods:
        mname = "LikelihoodD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "BDT" in o.methods:
        mname = "BDT%s" % o.label
        settings = defaultSettings["BDT"]
        if hasattr(o, "settings") and "BDT" in o.settings:
            settings = str(o.settings["BDT"])
        if len(categories) == 0:
            cats = factory.BookMethod(TMVA.Types.kBDT, mname, settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kBDT,
                               "%s_%s" % (mname, name), settings)

    if "Cuts" in o.methods:
        mname = "Cuts%s" % o.label
        settings = defaultSettings["Cuts"]
        if hasattr(o, "settings") and "Cuts" in o.settings:
            settings = str(o.settings["Cuts"])
        if len(categories) == 0:
            cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kCuts,
                               "%s_%s" % (mname, name), settings)

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
    if o.optimize:
        print "Optimizing?"
        factory.OptimizeAllMethods()

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
Ejemplo n.º 18
0
def main():

    name = str(options.name)

    #inputFile_sig = TFile.Open("/home/net3/afortman/projects/hotpot/oct_sim/layertests/hazel_both_smearf_1M_35ns_e100_split.root")
    #inputFile_bkg = TFile.Open("/home/net3/afortman/projects/hotpot/oct_sim/layertests/hazel_bkg_smearf_"+name+"_train.root")

    inputFile_sig = TFile.Open(
        "/home/net3/afortman/projects/hotpot/oct_sim/hazel_sig_smear0.root")
    inputFile_bkg = TFile.Open(
        "/home/net3/afortman/projects/hotpot/oct_sim/hazel_bkg_smear0.root")

    outputFile = TFile.Open("TMVAOutput_" + name + ".root", "RECREATE")

    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")

    loader = TMVA.DataLoader("dataset0")

    #loader.AddVariable("EventNumHazel",'I')
    #loader.AddVariable("EventNumGingko",'I')
    #loader.AddVariable("trigger_gingko",'I')
    #loader.AddVariable("iroad_x",'I')
    #loader.AddVariable("iroad_u",'I')
    #loader.AddVariable("iroad_v",'I')
    loader.AddVariable("Hit_plane0", 'I')
    loader.AddVariable("Hit_plane1", 'I')
    loader.AddVariable("Hit_plane2", 'I')
    loader.AddVariable("Hit_plane3", 'I')
    loader.AddVariable("Hit_plane4", 'I')
    loader.AddVariable("Hit_plane5", 'I')
    loader.AddVariable("Hit_plane6", 'I')
    loader.AddVariable("Hit_plane7", 'I')
    #loader.AddVariable("Hit_n",'I')
    #loader.AddVariable("dtheta")
    #loader.AddVariable("chi2")

    #tsignal = inputFile_sig.Get("hazel_train")
    tsignal = inputFile_sig.Get("hazel")
    tbackground = inputFile_bkg.Get("hazel")

    loader.AddSignalTree(tsignal)
    loader.AddBackgroundTree(tbackground)
    #loader.PrepareTrainingAndTestTree(TCut(""),"nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V")

    ######### for restriction to 8 layers
    #loader.PrepareTrainingAndTestTree(TCut("Hit_n==8"),"nTest_Signal=1000:nTest_Background=1000:SplitMode=Random:NormMode=NumEvents:!V")

    loader.PrepareTrainingAndTestTree(
        TCut(""),
        "nTest_Signal=1000:nTest_Background=1000:SplitMode=Random:NormMode=NumEvents:!V"
    )

    #loader.PrepareTrainingAndTestTree(TCut(""),"SplitMode=Random:NormMode=NumEvents:!V")

    # General layout
    layoutString = TString("Layout=TANH|128,TANH|128,TANH|128,LINEAR")

    # Training strategies
    training0 = TString("LearningRate=1e-1,Momentum=0.9,Repetitions=1,"
                        "ConvergenceSteps=2,BatchSize=256,TestRepetitions=10,"
                        "WeightDecay=1e-4,Regularization=L2,"
                        "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True")
    training1 = TString("LearningRate=1e-2,Momentum=0.9,Repetitions=1,"
                        "ConvergenceSteps=2,BatchSize=256,TestRepetitions=10,"
                        "WeightDecay=1e-4,Regularization=L2,"
                        "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True")
    trainingStrategyString = TString("TrainingStrategy=")
    trainingStrategyString += training0 + TString("|") + training1

    # General Options
    dnnOptions = TString("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
                         "WeightInitialization=XAVIERUNIFORM")
    dnnOptions.Append(":")
    dnnOptions.Append(layoutString)
    dnnOptions.Append(":")
    dnnOptions.Append(trainingStrategyString)

    # Standard implementation, no dependencies.
    stdOptions = dnnOptions + ":Architecture=CPU"
    factory.BookMethod(loader, TMVA.Types.kDNN, "DNN", stdOptions)

    #factory.BookMethod(loader, TMVA.Types.kDNN, "DNN","Architecture=CPU")

    ##Boosted Decision Trees
    #factory.BookMethod(loader,TMVA.Types.kBDT, "BDT","!V:NTrees=200:MinNodeSize=2.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    factory.BookMethod(loader, TMVA.Types.kBDT, "BDT")

    ##Multi-Layer Perceptron (Neural Network)
    factory.BookMethod(
        loader, TMVA.Types.kMLP, "MLP",
        "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5:TestRate=5:!UseRegulator"
    )

    #factory.BookMethod(loader, TMVA.Types.kKNN, "kNN" );

    # CPU implementation, using BLAS
    #cpuOptions = dnnOptions + ":Architecture=CPU"
    #factory.BookMethod(loader, TMVA.Types.kDNN, "DNN CPU", cpuOptions)

    factory.TrainAllMethods()

    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    c = factory.GetROCCurve(loader)
    c.Draw()
    c.SaveAs("dataset0/roc_TMVA_smear0.pdf")
Ejemplo n.º 19
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA_SSSF.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
    # factory = TMVA.Factory('TMVAClassification', output,   '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('datasetSSSF04Feb')
    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        print sampleName
        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V')

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG0",
        "!H:!V:NTrees=400:MinNodeSize=1.0%:BoostType=Grad:Shrinkage=0.03:UseBaggedBoost:GradBaggingFraction=0.4:nCuts=100:MaxDepth=1"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG1",
        "!H:!V:NTrees=400:MinNodeSize=1.0%:BoostType=Grad:Shrinkage=0.03:UseBaggedBoost:GradBaggingFraction=0.4:nCuts=500:MaxDepth=2"
    )

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4D3",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C3",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4SK01",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4F07",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4F07NT",
        "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4F07NC",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=1000:MaxDepth=2"
    )

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Ejemplo n.º 20
0
def main():
    print "\n", "=" * 80
    print "\tmonoHZZ4L - classification with TMVA"
    print "=" * 80

    # summary root file
    summaryFilename = 'TMVA.root'

    # results directory
    resultsDir = 'results'
    os.system('mkdir -p %s' % resultsDir)

    #------------------------------------------------------------------
    # get signal file and associated Root tree
    weightname = "f_weight"  # name of event weight variable
    sigFilename = '../../ntuple_Zprime_MZp01700_MA00300.root'
    sigFile, sigTree = getTree(sigFilename)

    # get background file and associated Root tree
    bkgFilename = '../../ntuple_SM.root'
    bkgFile, bkgTree = getTree(bkgFilename)

    #------------------------------------------------------------------
    # create a factory for booking machine learning methods
    #------------------------------------------------------------------
    outputFile = TFile("TMVA.root", "recreate")
    options = '''
    !V
    Color
    !Silent
    DrawProgressBar
    AnalysisType=Classification
    Transformations=I;D
    '''
    factory = TMVA.Factory("HZZ4L", outputFile, formatOptions(options))

    #------------------------------------------------------------------
    # set up data set for training and testing
    #------------------------------------------------------------------
    dataLoader = TMVA.DataLoader(resultsDir)

    # define all MELA variables here
    dataLoader.AddVariable("f_mass4l", 'D')
    dataLoader.AddVariable("f_D_bkg_kin", 'D')

    # define from which trees data are to be taken
    # and the global weights and event-by-event weights
    # to be assigned to the training data
    sigWeight = 1.0
    dataLoader.AddSignalTree(sigTree, sigWeight)
    dataLoader.SetSignalWeightExpression("f_weight")

    bkgWeight = 1.0
    dataLoader.AddBackgroundTree(bkgTree, bkgWeight)
    dataLoader.SetBackgroundWeightExpression("f_weight")

    # you can apply cuts, if needed
    cut = TCut('!f_outlier')

    options = '''
    SplitMode=Block
    NormMode=EqualNumEvents
    nTrain_Signal=5000
    nTest_Signal=5000
    nTrain_Background=5000
    nTest_Background=5000
    !V 
    '''
    dataLoader.PrepareTrainingAndTestTree(
        cut,  # signal cut
        cut,  # background cut
        formatOptions(options))

    #------------------------------------------------------------------
    # ok, almost done, define machine learning methods to be run
    #------------------------------------------------------------------

    options = '''
    !H
    !V
    BoostType=AdaBoost
    NTrees=5000
    nEventsMin=100
    nCuts=50
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT",
                       formatOptions(options))

    options = '''
    !H
    !V
    NCycles=400
    VarTransform=N
    HiddenLayers=10
    TrainingMethod=BFGS
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP",
                       formatOptions(options))

    #------------------------------------------------------------------
    # ok, let's go!
    #------------------------------------------------------------------
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputFile.Close()
Ejemplo n.º 21
0
for regressedvar in regressedvars:
    for cat in cats:
        if regressedvar:
            prefix = "BDT_Reg_common5_input_"
            midfix = "Reg_"
            outputpostfix = "reg"
        else:
            prefix = "BDT_common5_input_"
            midfix = ""
            outputpostfix = "noreg"

        print "Training the BDT for",cat,"with",outputpostfix,"variables"

        outputFile = TFile(str(sys.argv[3])+"_"+cat+"_"+outputpostfix+".root","RECREATE")

        factory = TMVA.Factory( "weights_"+str(sys.argv[3])+"_"+cat+"_"+outputpostfix, outputFile,"V:!Silent:Color:DrawProgressBar:AnalysisType=Classification" )

        weightexpression = "Weight_XS*Weight_ElectronSFID*Weight_MuonSFID*Weight_MuonSFIso*Weight_ElectronSFGFS*Weight_MuonSFHIP*Weight_pu69p2*Weight_CSV"
        #Add Variables

        if cat == "6j4t":
            
            factory.AddVariable("Evt_"+midfix+"blr_ETH_transformed","Evt_"+midfix+"blr_ETH_transformed","units",'F')
            #factory.AddVariable(prefix+"Evt_blr_ETH_transformed",prefix+"Evt_blr_ETH_transformed","units",'F')
            factory.AddVariable("Evt_Deta_JetsAverage","Evt_Deta_JetsAverage","units",'F')
            factory.AddVariable(prefix+"avg_dr_tagged_jets",prefix+"avg_dr_tagged_jets","units",'F')
            factory.AddVariable(prefix+"tagged_dijet_mass_closest_to_125",prefix+"tagged_dijet_mass_closest_to_125","units",'F')
            factory.AddVariable(prefix+"closest_tagged_dijet_mass",prefix+"closest_tagged_dijet_mass","units",'F')
            factory.AddVariable(prefix+"fifth_highest_CSV",prefix+"fifth_highest_CSV","units",'F')
            factory.AddVariable(prefix+"best_higgs_mass",prefix+"best_higgs_mass","units",'F')
            factory.AddVariable(prefix+"sphericity",prefix+"sphericity","units",'F')
def main():



    NTupName   = "varTree"    
    verbose    = True
    
    alg        = "AK10LCTRIMF5R20"
    spectators = ["m"]
    cuts       = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"]
    vars       = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"]
    methods    = "Likelihood"

    print "Starting and getting arguments:"
    allargs = sys.argv[1:]    
    if len(allargs)<5:
        print "You input these args"
        print allargs
        print "Not enough args, please try again"
        return 1
    else:
        alg        = allargs[0]
        spectators = allargs[1].split(",")
        cuts       = allargs[2].split(",")
        vars       = allargs[3].split(",")
        methods    = allargs[4]
    
    print "Running with args:"
    print "  alg        = ",alg        
    print "  spectators = ",spectators 
    print "  cuts       = ",cuts       
    print "  vars       = ",vars       
    print "  methods    = ",methods    
    

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()



    #===============================
    #Read training and test data
    #===============================
    InputDir = "Data/20150709/"
    print "Getting inputs from: ",InputDir
    s1 = TFile(InputDir+"wprime1000_wprime2000.root");
    b1 = TFile(InputDir+"dijet.root");

    # Output file
    OutFileName="testout.root"
    outputFile = TFile( OutFileName, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

#     weight=""
#     weight+="pass_selection*EventWeight*CrossSection*("
#     weight+="akt10"+alg+"_trim_pt>"+pt1+" && "
#     weight+="akt10"+alg+"_trim_pt<"+pt2
#     if m1!="0":
#         weight+=" && akt10"+alg+"_trim_mass>"+m1+" && "
#         weight+="akt10"+alg+"_trim_mass<"+m2
#     weight+=")"
#     
#     #Get signal and background histograms
#     if variable=="mass":
#         histname = "akt10"+alg+"_trim_"+variable
#     else:
#         histname = alg+"_"+variable
    
    #======================================
    #Predefined cuts - for isntance on M(j1)
    #======================================
    mycuts = "1.0"
    mycutb = "1.0"

    for cut in cuts:
        placecut=cut
        if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="mass":
            placecut = "* (akt10"+alg+"_trim_"+cut+")"
        else:
            placecut="* ("+cut+") "
        mycuts += placecut
        mycutb += placecut

    
    print "MyCutsSig: ",mycuts
    print "MyCutsBkg: ",mycutb

    #===================================
    #Spectator variables from tree
    #=====================================
    for spec in spectators:
        factory.AddSpectator( spec , 'F' )
        
    #===================================
    #MVA variables from tree
    #=====================================
    for var in vars:
        factory.AddVariable( var , 'F' )

    #===============================
    #Read training and test data
    #===============================
    print "Getting trees ..."
    st1 = s1.Get(NTupName)
    bt1 = b1.Get(NTupName)

    #=========================================
    # global event weights per tree (see below for setting event-wise weights)
    #=========================================
    ws1 = 1.0
    wb1 = 1.0

    #=========================================
    # You can add an arbitrary number of signal or background trees
    #=========================================
    factory.AddSignalTree    ( st1, ws1 );
    factory.SetSignalWeightExpression("EventWeight*CrossSection");

    factory.AddBackgroundTree( bt1, wb1 );
    factory.SetBackgroundWeightExpression("EventWeight*CrossSection");
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    mycutSig = TCut(mycuts)
    mycutBkg = TCut(mycutb)
    
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
# DEFAULT
#         factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
#                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )
# CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )



    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % OutFileName
    print "=== TMVAClassification is done!\n"
Ejemplo n.º 23
0
    np.ones(n_vars) * -1, np.diag(np.ones(n_vars)), n_events)
X = np.concatenate([signal, background])
y = np.ones(X.shape[0])
w = RNG.randint(1, 10, n_events * 2)
y[signal.shape[0]:] *= -1
permute = RNG.permutation(y.shape[0])
X = X[permute]
y = y[permute]

# Split into training and test datasets
X_train, y_train, w_train = X[:n_events], y[:n_events], w[:n_events]
X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:]

output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('classifier', output,
                       'AnalysisType=Classification:'
                       '!V:Silent:!DrawProgressBar')
for n in range(n_vars):
    factory.AddVariable('f{0}'.format(n), 'F')

# Call root_numpy's utility functions to add events from the arrays
add_classification_events(factory, X_train, y_train, weights=w_train)
add_classification_events(factory, X_test, y_test, weights=w_test, test=True)

# Train a classifier
factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')
factory.BookMethod('Fisher', 'Fisher',
                   'Fisher:VarTransform=None:CreateMVAPdfs:'
                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
                   'NsmoothMVAPdf=10')
factory.TrainAllMethods()
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    dataloader = TMVA.DataLoader('dataset_24March')
    output = TFile.Open('TMVA_24March.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut),
        'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents')

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT",
        "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT1",
        "!H:!V:NTrees=1000:MinNodeSize=0.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=1000"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT2",
        "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=1000"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4D3",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C3",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4SK01",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4F07",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4SK01F07",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTB",
        "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20")
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTB2",
        "!H:!V:NTrees=800:BoostType=Bagging:SeparationType=GiniIndex:nCuts=50")
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTB3",
        "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=100"
    )

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Ejemplo n.º 25
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:o:l:s:vh?"
        longopts   = ["methods=", "outputfile=", "lepton=", "signal=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    lepton = DEFAULT_LEPTON
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    signal      = DEFAULT_SIGNAL
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-s", "--signal"):
            signal = a
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-l", "--lepton"):
            if a == "electron":
                lepton = ELECTRON
            elif a == "muon":
                lepton = MUON
            else:
                print "*** Lepton must be 'electron' or 'muon ****"
                sys.exit(1)


    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # let's open the input files
    if lepton == ELECTRON:

        print "Lepton is ELECTRON."
        path = "/data3/jmitrevs/lepphoton/elphoton_ntuple2/mergedFiles/"

        wino_600_200FileName = path + "wino_600_200_el.root"
        wino_600_500FileName = path + "wino_600_500_el.root"
        wino_1000_200FileName = path + "wino_1000_200_el.root"
        wino_1500_300FileName = path + "wino_1500_300_el.root"
        
        WlepnuFileName_Np0 = path + "Wenu_Np0.root"
        WlepnuFileName_Np1 = path + "Wenu_Np1.root"
        WlepnuFileName_Np2 = path + "Wenu_Np2.root"
        WlepnuFileName_Np3 = path + "Wenu_Np3.root"
        WlepnuFileName_Np4 = path + "Wenu_Np4.root"
        WlepnuFileName_Np5 = path + "Wenu_Np5.root"

        ZleplepFileName_Np0 = path + "Zee_Np0.root"
        ZleplepFileName_Np1 = path + "Zee_Np1.root"
        ZleplepFileName_Np2 = path + "Zee_Np2.root"
        ZleplepFileName_Np3 = path + "Zee_Np3.root"
        ZleplepFileName_Np4 = path + "Zee_Np4.root"
        ZleplepFileName_Np5 = path + "Zee_Np5.root"

        st_tchan_lepnuFileName   = path + "st_tchan_enu.root"
        st_schan_lepnuFileName   = path + "st_schan_enu.root"
        ZleplepgammaFileName = path + "Zeegamma.root"

    elif lepton == MUON:

        print "Lepton is MUON."
        path = "/data3/jmitrevs/lepphoton/muphoton2/mergedFiles/"

        wino_600_200FileName = path + "wino_600_200_mu.root"
        wino_600_500FileName = path + "wino_600_500_mu.root"
        wino_1000_200FileName = path + "wino_1000_200_mu.root"
        wino_1500_300FileName = path + "wino_1500_300_mu.root"

        WlepnuFileName_Np0 = path + "Wmunu_Np0.root"
        WlepnuFileName_Np1 = path + "Wmunu_Np1.root"
        WlepnuFileName_Np2 = path + "Wmunu_Np2.root"
        WlepnuFileName_Np3 = path + "Wmunu_Np3.root"
        WlepnuFileName_Np4 = path + "Wmunu_Np4.root"
        WlepnuFileName_Np5 = path + "Wmunu_Np5.root"

        ZleplepFileName_Np0 = path + "Zmumu_Np0.root"
        ZleplepFileName_Np1 = path + "Zmumu_Np1.root"
        ZleplepFileName_Np2 = path + "Zmumu_Np2.root"
        ZleplepFileName_Np3 = path + "Zmumu_Np3.root"
        ZleplepFileName_Np4 = path + "Zmumu_Np4.root"
        ZleplepFileName_Np5 = path + "Zmumu_Np5.root"

        st_tchan_lepnuFileName   = path + "st_tchan_munu.root"
        st_schan_lepnuFileName   = path + "st_schan_munu.root"
        ZleplepgammaFileName = path + "Zmumugamma.root"

    else:
        raise ValueError("Lepton has to be ELECTRON or MUON.")

    
    WtaunuFileName_Np0 = path + "Wtaunu_Np0.root"
    WtaunuFileName_Np1 = path + "Wtaunu_Np1.root"
    WtaunuFileName_Np2 = path + "Wtaunu_Np2.root"
    WtaunuFileName_Np3 = path + "Wtaunu_Np3.root"
    WtaunuFileName_Np4 = path + "Wtaunu_Np4.root"
    WtaunuFileName_Np5 = path + "Wtaunu_Np5.root"

    ZtautauFileName_Np0 = path + "Ztautau_Np0.root"
    ZtautauFileName_Np1 = path + "Ztautau_Np1.root"
    ZtautauFileName_Np2 = path + "Ztautau_Np2.root"
    ZtautauFileName_Np3 = path + "Ztautau_Np3.root"
    ZtautauFileName_Np4 = path + "Ztautau_Np4.root"
    ZtautauFileName_Np5 = path + "Ztautau_Np5.root"

    st_tchan_taunuFileName = path + "st_tchan_taunu.root"    
    st_schan_taunuFileName = path + "st_schan_taunu.root"
    st_WtFileName   = path + "st_Wt.root"    
    
    WgammaFileName_Np0 = path + "Wgamma_Np0.root"
    WgammaFileName_Np1 = path + "Wgamma_Np1.root"
    WgammaFileName_Np2 = path + "Wgamma_Np2.root"
    WgammaFileName_Np3 = path + "Wgamma_Np3.root"
    WgammaFileName_Np4 = path + "Wgamma_Np4.root"
    WgammaFileName_Np5 = path + "Wgamma_Np5.root"
    
    ttbarFileName = path + "ttbar.root"

    WWFileName = path + "WW.root"
    WZFileName = path + "WZ.root"
    ZZFileName = path + "ZZ.root"

    ZtautaugammaFileName = path + "Ztautaugamma.root"

    
    ###########################################

    
    wino_600_200File = TFile(wino_600_200FileName)
    wino_600_500File = TFile(wino_600_500FileName)
    wino_1000_200File = TFile(wino_1000_200FileName)
    wino_1500_300File = TFile(wino_1500_300FileName)

    WlepnuFile_Np0 = TFile(WlepnuFileName_Np0)
    WlepnuFile_Np1 = TFile(WlepnuFileName_Np1)
    WlepnuFile_Np2 = TFile(WlepnuFileName_Np2)
    WlepnuFile_Np3 = TFile(WlepnuFileName_Np3)
    WlepnuFile_Np4 = TFile(WlepnuFileName_Np4)
    WlepnuFile_Np5 = TFile(WlepnuFileName_Np5)
    
    WtaunuFile_Np0 = TFile(WtaunuFileName_Np0)
    WtaunuFile_Np1 = TFile(WtaunuFileName_Np1)
    WtaunuFile_Np2 = TFile(WtaunuFileName_Np2)
    WtaunuFile_Np3 = TFile(WtaunuFileName_Np3)
    WtaunuFile_Np4 = TFile(WtaunuFileName_Np4)
    WtaunuFile_Np5 = TFile(WtaunuFileName_Np5)

    ZleplepFile_Np0 = TFile(ZleplepFileName_Np0)
    ZleplepFile_Np1 = TFile(ZleplepFileName_Np1)
    ZleplepFile_Np2 = TFile(ZleplepFileName_Np2)
    ZleplepFile_Np3 = TFile(ZleplepFileName_Np3)
    ZleplepFile_Np4 = TFile(ZleplepFileName_Np4)
    ZleplepFile_Np5 = TFile(ZleplepFileName_Np5)
    
    ZtautauFile_Np0 = TFile(ZtautauFileName_Np0)
    ZtautauFile_Np1 = TFile(ZtautauFileName_Np1)
    ZtautauFile_Np2 = TFile(ZtautauFileName_Np2)
    ZtautauFile_Np3 = TFile(ZtautauFileName_Np3)
    ZtautauFile_Np4 = TFile(ZtautauFileName_Np4)
    ZtautauFile_Np5 = TFile(ZtautauFileName_Np5)
    
    WgammaFile_Np0 = TFile(WgammaFileName_Np0)
    WgammaFile_Np1 = TFile(WgammaFileName_Np1)
    WgammaFile_Np2 = TFile(WgammaFileName_Np2)
    WgammaFile_Np3 = TFile(WgammaFileName_Np3)
    WgammaFile_Np4 = TFile(WgammaFileName_Np4)
    WgammaFile_Np5 = TFile(WgammaFileName_Np5)
    
    ttbarFile = TFile(ttbarFileName)
    
    st_tchan_lepnuFile   = TFile(st_tchan_lepnuFileName)
    st_tchan_taunuFile = TFile(st_tchan_taunuFileName)
    
    st_schan_lepnuFile   = TFile(st_schan_lepnuFileName)
    st_schan_taunuFile = TFile(st_schan_taunuFileName)
    
    st_WtFile   = TFile(st_WtFileName)

    WWFile = TFile(WWFileName)
    WZFile = TFile(WZFileName)
    ZZFile = TFile(ZZFileName)
    
    ZleplepgammaFile = TFile(ZleplepgammaFileName)
    ZtautaugammaFile = TFile(ZtautaugammaFileName)

    ###########################################

    
    wino_600_200Tree = wino_600_200File.Get("GammaLepton")
    wino_600_500Tree = wino_600_500File.Get("GammaLepton")
    wino_1000_200Tree = wino_1000_200File.Get("GammaLepton")
    wino_1500_300Tree = wino_1500_300File.Get("GammaLepton")

    WlepnuTree_Np0 = WlepnuFile_Np0.Get("GammaLepton")
    WlepnuTree_Np1 = WlepnuFile_Np1.Get("GammaLepton")
    WlepnuTree_Np2 = WlepnuFile_Np2.Get("GammaLepton")
    WlepnuTree_Np3 = WlepnuFile_Np3.Get("GammaLepton")
    WlepnuTree_Np4 = WlepnuFile_Np4.Get("GammaLepton")
    WlepnuTree_Np5 = WlepnuFile_Np5.Get("GammaLepton")
    
    WtaunuTree_Np0 = WtaunuFile_Np0.Get("GammaLepton")
    WtaunuTree_Np1 = WtaunuFile_Np1.Get("GammaLepton")
    WtaunuTree_Np2 = WtaunuFile_Np2.Get("GammaLepton")
    WtaunuTree_Np3 = WtaunuFile_Np3.Get("GammaLepton")
    WtaunuTree_Np4 = WtaunuFile_Np4.Get("GammaLepton")
    WtaunuTree_Np5 = WtaunuFile_Np5.Get("GammaLepton")

    ZleplepTree_Np0 = ZleplepFile_Np0.Get("GammaLepton")
    ZleplepTree_Np1 = ZleplepFile_Np1.Get("GammaLepton")
    ZleplepTree_Np2 = ZleplepFile_Np2.Get("GammaLepton")
    ZleplepTree_Np3 = ZleplepFile_Np3.Get("GammaLepton")
    ZleplepTree_Np4 = ZleplepFile_Np4.Get("GammaLepton")
    ZleplepTree_Np5 = ZleplepFile_Np5.Get("GammaLepton")
    
    ZtautauTree_Np0 = ZtautauFile_Np0.Get("GammaLepton")
    ZtautauTree_Np1 = ZtautauFile_Np1.Get("GammaLepton")
    ZtautauTree_Np2 = ZtautauFile_Np2.Get("GammaLepton")
    ZtautauTree_Np3 = ZtautauFile_Np3.Get("GammaLepton")
    ZtautauTree_Np4 = ZtautauFile_Np4.Get("GammaLepton")
    ZtautauTree_Np5 = ZtautauFile_Np5.Get("GammaLepton")
    
    WgammaTree_Np0 = WgammaFile_Np0.Get("GammaLepton")
    WgammaTree_Np1 = WgammaFile_Np1.Get("GammaLepton")
    WgammaTree_Np2 = WgammaFile_Np2.Get("GammaLepton")
    WgammaTree_Np3 = WgammaFile_Np3.Get("GammaLepton")
    WgammaTree_Np4 = WgammaFile_Np4.Get("GammaLepton")
    WgammaTree_Np5 = WgammaFile_Np5.Get("GammaLepton")
    
    ttbarTree = ttbarFile.Get("GammaLepton")
    
    st_tchan_lepnuTree   = st_tchan_lepnuFile.Get("GammaLepton")
    st_tchan_taunuTree = st_tchan_taunuFile.Get("GammaLepton")
    
    st_schan_lepnuTree   = st_schan_lepnuFile.Get("GammaLepton")
    st_schan_taunuTree = st_schan_taunuFile.Get("GammaLepton")
    
    st_WtTree   = st_WtFile.Get("GammaLepton")

    WWTree = WWFile.Get("GammaLepton")
    WZTree = WZFile.Get("GammaLepton")
    ZZTree = ZZFile.Get("GammaLepton")
    
    ZleplepgammaTree = ZleplepgammaFile.Get("GammaLepton")
    ZtautaugammaTree = ZtautaugammaFile.Get("GammaLepton")
    
    ##############################
    # and now the weights

    # wino_600_200_scale = 7.005
    # wino_600_500_scale = 3.03021
    # wino_1000_200_scale = 4.1325
    # wino_1500_300_scale = 0.16
    # Wlepnu_Np0_scale = 12.0052623622
    # Wlepnu_Np1_scale = 3.13076456857
    # Wlepnu_Np2_scale = 0.60296853897
    # Wlepnu_Np3_scale = 0.603183318846
    # Wlepnu_Np4_scale = 0.62088
    # Wlepnu_Np5_scale = 0.600008571551
    # Wtaunu_Np0_scale = 12.1457006649
    # Wtaunu_Np1_scale = 3.12868868923
    # Wtaunu_Np2_scale = 0.602359552172
    # Wtaunu_Np3_scale = 0.602586672951
    # Wtaunu_Np4_scale = 0.62088496708
    # Wtaunu_Np5_scale = 0.638769230769
    # Zleplep_Np0_scale = 0.631361988532
    # Zleplep_Np1_scale = 0.629541167757
    # Zleplep_Np2_scale = 0.625618828688
    # Zleplep_Np3_scale = 0.634090909091
    # Zleplep_Np4_scale = 0.6
    # Zleplep_Np5_scale = 0.51875
    # Ztautau_Np0_scale = 0.631228327261
    # Ztautau_Np1_scale = 0.631347664299
    # Ztautau_Np2_scale = 0.622916409433
    # Ztautau_Np3_scale = 0.640077378243
    # Ztautau_Np4_scale = 0.581269375646
    # Ztautau_Np5_scale = 0.48125
    # Wgamma_Np0_scale = 0.0129441737417
    # Wgamma_Np1_scale = 0.0635170304401
    # Wgamma_Np2_scale = 0.140920227273
    # Wgamma_Np3_scale = 0.140622611111
    # Wgamma_Np4_scale = 0.134589
    # Wgamma_Np5_scale = 0.123308
    # ttbar_scale = 0.0384505023442
    # st_tchan_lepnu_scale = 0.200916540624
    # st_tchan_taunu_scale = 0.201132004918
    # st_schan_lepnu_scale = 0.0092735093327
    # st_schan_taunu_scale = 0.00926981472204
    # st_Wt_scale = 0.0916407781992
    # WW_scale = 0.0342151663714
    # WZ_scale = 0.110873818259
    # ZZ_scale = 0.0252773011092
    # Zleplepgamma_scale = 0.963
    # Ztautaugamma_scale = 0.941960800016

    #################ntuple_pt25
    # wino_600_200_scale = 1.401
    # wino_600_500_scale = 3.03021
    # wino_1000_200_scale = 4.1325
    # wino_1500_300_scale = 0.16
    # Wlepnu_Np0_scale = 12.0052623622
    # Wlepnu_Np1_scale = 3.13076456857
    # Wlepnu_Np2_scale = 0.60296853897
    # Wlepnu_Np3_scale = 0.603183318846
    # Wlepnu_Np4_scale = 0.62088
    # Wlepnu_Np5_scale = 0.600008571551
    # Wtaunu_Np0_scale = 12.1457006649
    # Wtaunu_Np1_scale = 3.12868868923
    # Wtaunu_Np2_scale = 0.602359552172
    # Wtaunu_Np3_scale = 0.602586672951
    # Wtaunu_Np4_scale = 0.62088496708
    # Wtaunu_Np5_scale = 0.638769230769
    # Zleplep_Np0_scale = 0.631361988532
    # Zleplep_Np1_scale = 0.629541167757
    # Zleplep_Np2_scale = 0.625618828688
    # Zleplep_Np3_scale = 0.634090909091
    # Zleplep_Np4_scale = 0.6
    # Zleplep_Np5_scale = 0.51875
    # Ztautau_Np0_scale = 0.631228327261
    # Ztautau_Np1_scale = 0.631347664299
    # Ztautau_Np2_scale = 0.622916409433
    # Ztautau_Np3_scale = 0.640077378243
    # Ztautau_Np4_scale = 0.581269375646
    # Ztautau_Np5_scale = 0.48125
    # Wgamma_Np0_scale = 1.08706263428
    # Wgamma_Np1_scale = 0.734676952566
    # Wgamma_Np2_scale = 0.733754057143
    # Wgamma_Np3_scale = 0.149752323594
    # Wgamma_Np4_scale = 0.157524392683
    # Wgamma_Np5_scale = 0.1281354
    # ttbar_scale = 0.0384505023442
    # st_tchan_lepnu_scale = 0.200916540624
    # st_tchan_taunu_scale = 0.201132004918
    # st_Wt_scale = 0.0916407781992
    # WW_scale = 0.0342151663714
    # WZ_scale = 0.110873818259
    # ZZ_scale = 0.0252773011092
    # Zleplepgamma_scale = 0.963
    # Ztautaugamma_scale = 0.941960800016
    # gamma_Np1_scale = 4.06453310851
    # gamma_Np2_scale = 3.3709968686
    # gamma_Np3_scale = 1.38728943513
    # gamma_Np4_scale = 1.41464077802
    # gamma_Np5_scale = 1.23661096137


    if lepton == ELECTRON:
        wino_600_200_scale = 0.291875
        wino_600_500_scale = 2.69352
        wino_1000_200_scale = 4.1325
        wino_1500_300_scale = 0.0093269
        wino_1000_100_scale = 69.5
        wino_800_700_scale = 0.2328
        Wlepnu_Np0_scale = 12.0052623622
        Wlepnu_Np1_scale = 3.13076456857
        Wlepnu_Np2_scale = 0.60296853897
        Wlepnu_Np3_scale = 0.603183318846
        Wlepnu_Np4_scale = 0.62088
        Wlepnu_Np5_scale = 0.600008571551
        Wtaunu_Np0_scale = 12.1457006649
        Wtaunu_Np1_scale = 3.12868868923
        Wtaunu_Np2_scale = 0.602359552172
        Wtaunu_Np3_scale = 0.602586672951
        Wtaunu_Np4_scale = 0.62088496708
        Wtaunu_Np5_scale = 0.638769230769
        Zleplep_Np0_scale = 0.631361988532
        Zleplep_Np1_scale = 0.629541167757
        Zleplep_Np2_scale = 0.625618828688
        Zleplep_Np3_scale = 0.634090909091
        Zleplep_Np4_scale = 0.6
        Zleplep_Np5_scale = 0.51875
        Ztautau_Np0_scale = 0.631228327261
        Ztautau_Np1_scale = 0.631347664299
        Ztautau_Np2_scale = 0.622916409433
        Ztautau_Np3_scale = 0.640077378243
        Ztautau_Np4_scale = 0.581269375646
        Ztautau_Np5_scale = 0.48125
        Wgamma_Np0_scale = 0.0132834003639
        Wgamma_Np1_scale = 0.0651816146862
        Wgamma_Np2_scale = 0.144613309091
        Wgamma_Np3_scale = 0.144307893333
        Wgamma_Np4_scale = 0.13811616
        Wgamma_Np5_scale = 0.12653952
        ttbar_scale = 0.0384505023442
        st_tchan_lepnu_scale = 0.200916540624
        st_tchan_taunu_scale = 0.201132004918
        st_Wt_scale = 0.0916407781992
        WW_scale = 0.0342151663714
        WZ_scale = 0.110873818259
        ZZ_scale = 0.0252773011092
        Zleplepgamma_scale = 0.963
        Ztautaugamma_scale = 0.941960800016
        gamma_Np1_scale = 4.17064063358
        gamma_Np2_scale = 3.35244054801
        gamma_Np3_scale = 1.36994217452
        gamma_Np4_scale = 1.41464077802
        gamma_Np5_scale = 1.23661096137
    elif lepton == MUON:
        wino_600_200_scale = 0.291875
        Wlepnu_Np0_scale = 11.9925371604
        Wlepnu_Np1_scale = 3.13058966
        Wlepnu_Np2_scale = 0.601616497017
        Wlepnu_Np3_scale = 0.605913424797
        Wlepnu_Np4_scale = 0.606001176701
        Wlepnu_Np5_scale = 0.593142857143
        Wtaunu_Np0_scale = 12.1457006649
        Wtaunu_Np1_scale = 3.12868868923
        Wtaunu_Np2_scale = 0.602359552172
        Wtaunu_Np3_scale = 0.602586672951
        Wtaunu_Np4_scale = 0.62088496708
        Wtaunu_Np5_scale = 0.638769230769
        Zleplep_Np0_scale = 0.631664271554
        Zleplep_Np1_scale = 0.628327597475
        Zleplep_Np2_scale = 0.62551337696
        Zleplep_Np3_scale = 0.635795454545
        Zleplep_Np4_scale = 0.572916666667
        Zleplep_Np5_scale = 0.48125
        Ztautau_Np0_scale = 0.631228327261
        Ztautau_Np1_scale = 0.631347664299
        Ztautau_Np2_scale = 0.622916409433
        Ztautau_Np3_scale = 0.640077378243
        Ztautau_Np4_scale = 0.581269375646
        Ztautau_Np5_scale = 0.48125
        Wgamma_Np0_scale = 0.0132834003639
        Wgamma_Np1_scale = 0.0651816146862
        Wgamma_Np2_scale = 0.144613309091
        Wgamma_Np3_scale = 0.144307893333
        Wgamma_Np4_scale = 0.13811616
        Wgamma_Np5_scale = 0.12653952
        ttbar_scale = 0.0384505023442
        st_tchan_lepnu_scale = 0.201919368378
        st_tchan_taunu_scale = 0.201132004918
        st_Wt_scale = 0.0916407781992
        WW_scale = 0.0342151663714
        WZ_scale = 0.110873818259
        ZZ_scale = 0.0252773011092
        Zleplepgamma_scale = 0.963963963964
        Ztautaugamma_scale = 0.941960800016
        gamma_Np1_scale = 4.08704733658
        gamma_Np2_scale = 3.35244054801
        gamma_Np3_scale = 1.36994217452
        gamma_Np4_scale = 1.41464077802
        gamma_Np5_scale = 1.23661096137
        

    if signal == "600_200":
        factory.AddSignalTree(wino_600_200Tree, wino_600_200_scale)
    elif signal == "600_500":
        factory.AddSignalTree(wino_600_500Tree, wino_600_500_scale)
    elif signal == "1000_200":
        factory.AddSignalTree(wino_1000_200Tree, wino_1000_200_scale)
    elif signal == "1500_300":
        factory.AddSignalTree(wino_1500_300Tree, wino_1500_300_scale)
    else:
        print "*** signal designation not supported: %s ****" % signal
        sys.exit(1)

    factory.AddBackgroundTree(WlepnuTree_Np0, Wlepnu_Np0_scale)
    factory.AddBackgroundTree(WlepnuTree_Np1, Wlepnu_Np1_scale)
    factory.AddBackgroundTree(WlepnuTree_Np2, Wlepnu_Np2_scale)
    factory.AddBackgroundTree(WlepnuTree_Np3, Wlepnu_Np3_scale)
    factory.AddBackgroundTree(WlepnuTree_Np4, Wlepnu_Np4_scale)
    factory.AddBackgroundTree(WlepnuTree_Np5, Wlepnu_Np5_scale)
    
    #factory.AddBackgroundTree(WtaunuTree_Np0, Wtaunu_Np0_scale)
    factory.AddBackgroundTree(WtaunuTree_Np1, Wtaunu_Np1_scale)
    factory.AddBackgroundTree(WtaunuTree_Np2, Wtaunu_Np2_scale)
    factory.AddBackgroundTree(WtaunuTree_Np3, Wtaunu_Np3_scale)
    factory.AddBackgroundTree(WtaunuTree_Np4, Wtaunu_Np4_scale)
    factory.AddBackgroundTree(WtaunuTree_Np5, Wtaunu_Np5_scale)

    # factory.AddBackgroundTree(ZleplepTree_Np0, Zleplep_Np0_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np1, Zleplep_Np1_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np2, Zleplep_Np2_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np3, Zleplep_Np3_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np4, Zleplep_Np4_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np5, Zleplep_Np5_scale)
    
    # factory.AddBackgroundTree(ZtautauTree_Np0, Ztautau_Np0_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np1, Ztautau_Np1_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np2, Ztautau_Np2_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np3, Ztautau_Np3_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np4, Ztautau_Np4_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np5, Ztautau_Np5_scale)
    
    factory.AddBackgroundTree(WgammaTree_Np0, Wgamma_Np0_scale)
    factory.AddBackgroundTree(WgammaTree_Np1, Wgamma_Np1_scale)
    factory.AddBackgroundTree(WgammaTree_Np2, Wgamma_Np2_scale)
    factory.AddBackgroundTree(WgammaTree_Np3, Wgamma_Np3_scale)
    factory.AddBackgroundTree(WgammaTree_Np4, Wgamma_Np4_scale)
    factory.AddBackgroundTree(WgammaTree_Np5, Wgamma_Np5_scale)
    
    factory.AddBackgroundTree(ttbarTree, ttbar_scale)
    
    factory.AddBackgroundTree(st_tchan_lepnuTree, st_tchan_lepnu_scale)
    factory.AddBackgroundTree(st_tchan_taunuTree, st_tchan_taunu_scale)
    
    # factory.AddBackgroundTree(st_schan_lepnuTree, st_schan_lepnu_scale)
    # factory.AddBackgroundTree(st_schan_taunuTree, st_schan_taunu_scale)
    
    factory.AddBackgroundTree(st_WtTree, st_Wt_scale)

    factory.AddBackgroundTree(WWTree, WW_scale)
    factory.AddBackgroundTree(WZTree, WZ_scale)
    factory.AddBackgroundTree(ZZTree, ZZ_scale)
    
    factory.AddBackgroundTree(ZleplepgammaTree, Zleplepgamma_scale)
    factory.AddBackgroundTree(ZtautaugammaTree, Ztautaugamma_scale)
    


    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "MET := sqrt(Metx*Metx+Mety*Mety)", 'F' )
    # factory.AddVariable( "HT", 'F' )
    factory.AddVariable( "PhotonPt[0]", 'F' )
    #factory.AddVariable( "ElectronPt[0]", 'F' )
    if lepton == ELECTRON:
        factory.AddVariable( "mTel", 'F' )
    else:
        factory.AddVariable( "mTmu", 'F' )
    #factory.AddVariable( "abs(PhotonEta[0])", 'F' )
    #factory.AddVariable( "abs(ElectronEta[0])", 'F' )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "abs(PhotonEta[0]) < 2.01 && abs(ElectronEta[0]) < 2.01" ) 

    if lepton == ELECTRON:
        mycutSig = TCut( "sqrt((PhotonEta[0]-ElectronEta[0])*(PhotonEta[0]-ElectronEta[0]) + (PhotonPhi[0]-ElectronPhi[0])*(PhotonPhi[0]-ElectronPhi[0])) > 0.7")
    else:
        mycutSig = TCut( "sqrt((PhotonEta[0]-MuonEta[0])*(PhotonEta[0]-MuonEta[0]) + (PhotonPhi[0]-MuonPhi[0])*(PhotonPhi[0]-MuonPhi[0])) > 0.7")
    #mycutSig = TCut( "PhotonPt[0] > 85000" ) 
    mycutBkg = mycutSig 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    # if "CutsGA" in mlist:
    #     factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
    #                         "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    # if "CutsSA" in mlist:
    #     factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
    #                         "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )


    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    
    # keep the ROOT thread running
    gApplication.Run() 
Ejemplo n.º 26
0
def main(): # runs the program
  checkRootVer() # check that ROOT version is correct
  
  try: # retrieve command line options
    shortopts   = "d:o:v:w:y:h?" # possible command line options
    longopts    = ["dataset=",
                   "option=",
                   "where=",
                   "year=",
                   "verbose",
                   "help",
                   "usage"]
    opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) # associates command line inputs to variables
  
  except getopt.GetoptError: # output error if command line argument invalid
    print("ERROR: unknown options in argument %s" %sys.argv[1:])
    usage()
    sys.exit(1)
  
  myArgs = np.array([ # Stores the command line arguments   
    ['-d','--dataset','dataset','dataset'],
    ['-w','--where','where','lpc'],
    ['-y','--year','year',2017],
    ['-o','--option','option', 0],    
    ['-v','--verbose','verbose', True]
  ], dtype = "object")
  
  for opt, arg in opts:
    if opt in myArgs[:,0]:
      index = np.where(myArgs[:,0] == opt)[0][0] # np.where returns a tuple of arrays
      myArgs[index,3] = str(arg) # override the variables with the command line argument
    elif opt in myArgs[:,1]:
      index = np.where(myArgs[:,1] == opt)[0][0] 
      myArgs[index,3] = arg
    if opt in ("-?", "-h", "--help", "--usage"): # provides command line help
      usage()
      sys.exit(0)
  
  # Initialize some containers
  bkg_list = []
  bkg_trees_list = []
  sig_list = []
  sig_trees_list = []
  
  # Initialize some variables after reading in arguments
  option_index = np.where(myArgs[:,2] == 'option')[0][0]
  dataset_index = np.where(myArgs[:,2] == 'dataset')[0][0]
  verbose_index = np.where(myArgs[:,2] == 'verbose')[0][0]
  where_index = np.where(myArgs[:,2] == 'where')[0][0]
  year_index = np.where(myArgs[:,2] == 'year')[0][0]

  DATASETPATH = myArgs[dataset_index][3]
  DATASET = DATASETPATH.split("/")[0]
  OPTION = myArgs[option_index][3]
  VERBOSE = myArgs[verbose_index][3]
  WHERE = myArgs[where_index][3]
  YEAR = myArgs[year_index][3]
  
  if WHERE == "lpc":
    if YEAR == 2017:
      inputDir = varsList.inputDirLPC2017
    elif YEAR == 2018:
      inputDir = varsList.inputDirLPC2018
  else:
    if YEAR == 2017:
      inputDir = varsList.inputDirBRUX2017
    elif YEAR == 2018:
      inputDir = varsList.inputDirBRUX2018
 
  if OPTION == "0":
    print("Using Option 0: default varList")
    varList = varsList.varList["DNN"]
  
  elif OPTION == "1":
    print("Using Option 1: selected data from {}".format(DATASETPATH))
    varsListHPO = open( DATASETPATH + "/varsListHPO.txt", "r" ).readlines()
    varList = []
    START = False
    for line in varsListHPO:
      if START == True:
        varList.append(str(line.strip()))
      if "Variable List:" in line:
        START = True

  numVars = len(varList)
  outf_key = str("Keras_" + str(numVars) + "vars") 
  OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root"
  outputfile = TFile( OUTF_NAME, "RECREATE" )

  # initialize and set-up TMVA factory
  
  factory = TMVA.Factory( "Training", outputfile,
    "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification" )
    
  factory.SetVerbose(bool( myArgs[verbose_index,3] ) )
  (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key
  
  # initialize and set-up TMVA loader
  
  loader = TMVA.DataLoader( DATASET )
  
  if OPTION == "0":
    for var in varList:
      if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0],var[1],var[2],'I')
      else: loader.AddVariable(var[0],var[1],var[2],"F")
  if OPTION == "1":
    for var in varList:
      if var == "NJets_MultiLepCalc": loader.AddVariable(var,"","","I")
      else: loader.AddVariable(var,"","","F")
 
  # add signal files
  if YEAR == 2017:
    for i in range( len( varsList.sig2017_2 ) ):
      sig_list.append( TFile.Open( inputDir + varsList.sig2017_2[i] ) )
      sig_trees_list.append( sig_list[i].Get("ljmet") )
      sig_trees_list[i].GetEntry(0)
      loader.AddSignalTree( sig_trees_list[i] )
      
  elif YEAR == 2018:
    for i in range( len( varsList.sig2018_2 ) ):
      sig_list.append( TFile.Open( inputDir + varsList.sig2018_2[i] ) )
      sig_trees_list.append( sig_list[i].Get("ljmet") )
      sig_trees_list[i].GetEntry(0)
      loader.AddSignalTree( sig_trees_list[i] )
  
  # add background files
  if YEAR == 2017:
    for i in range( len( varsList.bkg2017_2 ) ):
      bkg_list.append( TFile.Open( inputDir + varsList.bkg2017_2[i] ) )
      bkg_trees_list.append( bkg_list[i].Get( "ljmet" ) )
      bkg_trees_list[i].GetEntry(0)

      if bkg_trees_list[i].GetEntries() == 0:
        continue
      loader.AddBackgroundTree( bkg_trees_list[i] )

  elif YEAR == 2018:
    for i in range( len( varsList.bkg2018_2 ) ):
      bkg_list.append( TFile.Open( inputDir + varsList.bkg2018_2[i] ) )
      bkg_trees_list.append( bkg_list[i].Get( "ljmet" ) )
      bkg_trees_list[i].GetEntry(0)

      if bkg_trees_list[i].GetEntries() == 0:
        continue
      loader.AddBackgroundTree( bkg_trees_list[i] )
  
  loader.SetSignalWeightExpression( weightStrS )
  loader.SetBackgroundWeightExpression( weightStrB )
  
  mycutSig = TCut( cutStrS )
  mycutBkg = TCut( cutStrB )
  
  loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, 
    "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
  )
 
######################################################
######################################################
######                                          ######
######            K E R A S   D N N             ######
######                                          ######
######################################################
######################################################
  HIDDEN=0
  NODES=0
  LRATE=0.
  PATTERN=""
  REGULATOR=""
  ACTIVATION=""
  BATCH_SIZE=0 
  # modify this when implementing hyper parameter optimization:
  model_name = 'TTTT_' + str(numVars) + 'vars_model.h5'
  
  EPOCHS = 100
  PATIENCE = 20
  
  # edit these based on hyper parameter optimization results
  if OPTION == "0":
    HIDDEN = 3
    NODES = 100
    LRATE = 0.01
    PATTERN = 'static'
    REGULATOR = 'none'
    ACTIVATION = 'relu'
    BATCH_SIZE = 256
  if OPTION == "1":
    datasetDir = os.listdir(DATASETPATH)
    for file in datasetDir:
      if "params" in file: optFileName = file
    optFile = open(DATASETPATH + "/" + optFileName,"r").readlines()
    START = False
    for line in optFile:
      if START == True:
        if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip())
        if "Initial" in line: NODES = int(line.split(":")[1].strip())
        if "Batch" in line: BATCH_SIZE = 2**int(line.split(":")[1].strip())
        if "Learning" in line: LRATE = float(line.split(":")[1].strip())
        if "Pattern" in line: PATTERN = str(line.split(":")[1].strip())
        if "Regulator" in line: REGULATOR = str(line.split(":")[1].strip())
        if "Activation" in line: ACTIVATION = str(line.split(":")[1].strip())
      if "Optimized Parameters:" in line: START = True
  kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \
                 ':SaveBestOnly=true' + \
                 ':NumEpochs=' + str(EPOCHS) + \
                 ':BatchSize=' + str(BATCH_SIZE) + \
                 ':TriesEarlyStopping=' + str(PATIENCE)
  
  model = build_model(HIDDEN,NODES,LRATE,REGULATOR,PATTERN,ACTIVATION,numVars)
  model.save( model_name )
  model.summary()
  
  factory.BookMethod(
    loader,
    TMVA.Types.kPyKeras,
    'PyKeras',
    kerasSetting
  )
  
  factory.TrainAllMethods()
  factory.TestAllMethods()
  factory.EvaluateAllMethods()
  
  outputfile.Close()
  
  print("Finished training in " + str((time.time() - START_TIME) / 60.0) + " minutes.")
  
  ROC = factory.GetROCIntegral( DATASET, 'PyKeras')
  print('ROC value is: {}'.format(ROC))
  if OPTION == "1":
   varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt","a")
   varsListHPOtxt.write("ROC Value: {}".format(ROC))
Ejemplo n.º 27
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable("myvar1 := var1+var2", 'F')
    factory.AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F')
    factory.AddVariable("var3", "Variable 3", "units", 'F')
    factory.AddVariable("var4", "Variable 4", "units", 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
    factory.AddSpectator("spec1:=var1*2", "Spectator 1", "units", 'F')
    factory.AddSpectator("spec2:=var1*3", "Spectator 2", "units", 'F')

    # Read input data
    if gSystem.AccessPathName(infname) != 0:
        gSystem.Exec("wget http://root.cern.ch/files/" + infname)

    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal = input.Get(treeNameSig)
    background = input.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree(signal, signalWeight)
    factory.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    factory.SetBackgroundWeightExpression("weight")

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut("")
    mycutBkg = TCut("")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLP",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2"
        )

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    gROOT.ProcessLine("TMVAGui(\"%s\")" % outfname)

    # keep the ROOT thread running
    gApplication.Run()
Ejemplo n.º 28
0
def TMVAClassification(methods,
                       sigfname,
                       bkgfname,
                       optname,
                       channel,
                       trees,
                       verbose=False):  #="DecayTree,DecayTree"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Define trees
    trees = trees.split(",")
    if len(trees) - trees.count('') != 2:
        print "ERROR: need to give two trees (each one for signal and background)"
        print trees
        sys.exit(1)
    treeNameSig = trees[0]
    treeNameBkg = trees[1]

    # Print output file and directory
    outfname = "TMVA_%s_%s.root" % (channel, optname)
    myWeightDirectory = "weights_%s_%s" % (channel, optname)
    print "=== TMVAClassification: output will be written to:"
    print "=== %s" % outfname
    print "=== %s" % myWeightDirectory

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    #gROOT.SetMacroPath( "./" )
    #gROOT.Macro       ( "./tmva/test/TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary??

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Setup TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Load data
    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory
    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    print "*** Training on channel:"
    print "*** %s" % channel
    print "***"
    '''
    if channel == "1":
        #dataloader.AddVariable( "pplus_ProbNNp",                      "Prob(p^{+})",                             "",     'F' );
        #dataloader.AddVariable( "Kminus_ProbNNk",                     "Prob(K^{-})",                             "",     'F' );

        dataloader.AddVariable( "pplus_PT",                           "P_{T}(p^{+})",                             "MeV", 'F' );
        dataloader.AddVariable( "Kminus_PT",                          "P_{T}(K^{-})",                             "MeV", 'F' );
        dataloader.AddVariable( "gamma_PT",                           "P_{T}(#gamma)",                            "MeV", 'F' );
        dataloader.AddVariable( "Lambda_1520_0_PT",                   "P_{T}(#Lambda(1520))",                     "MeV", 'F' );
        dataloader.AddVariable( "B_PT",                               "P_{T}(#Lambda_{b})",                       "MeV", 'F' );

        dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta",             "MeV", 'F' );
        dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );
        dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}",                          "MeV", 'F' );

        dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' );
        dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' );

        dataloader.AddVariable( "pplus_IPCHI2_OWNPV",                 "#chi^{2}_{IP}(p^{+})",                       ""  ,  'F' );
        dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable( "B_IPCHI2_OWNPV",                     "#chi^{2}_{IP}(#Lambda_{b})",                 ""  ,  'F' );
        #dataloader.AddVariable( "gamma_IPCHI2_OWNPV",                 "IP #chi^{2}(#gamma)",                       ""  ,  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV",             "IP(#Lambda(1520))",                        "mm",  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV",         "IP#chi^{2}(#Lambda(1520))",               "",    'F' );
        
        dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2",           "#chi^{2}_{vertex}(#Lambda(1520))",           ""  ,  'F' );
        dataloader.AddVariable( "B_OWNPV_CHI2",                       "#chi^{2}_{vertex}(#Lambda_{b})",             ""  ,  'F' );
        dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' );
        
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda(1520))",               "",    'F' );
        dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' );
    '''

    if channel == "2":
        dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F')
        dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F')
        dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F')
        dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV",
                               'F')
        dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F')

        dataloader.AddVariable(
            "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)",
            "#beta", "", 'F')
        #ok
        #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );#BDT learned Mass check1
        dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P",
                               "P_{tot,2}", "MeV", 'F')
        #ok

        #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta
        dataloader.AddVariable(
            "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)",
            "#eta(K^{-})-#eta(p^{+})", "", 'F')
        dataloader.AddVariable(
            "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)",
            "#eta(#Lambda*)", "", 'F')
        dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)",
                               "#eta(#gamma)", "", 'F')

        dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})",
                               "", 'F')
        #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})",
                               "", 'F')
        dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV",
                               "#chi^{2}_{IP}(#Lambda*)", "", 'F')

        dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda*)", "", 'F')
        dataloader.AddVariable("B_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F')
        #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF",    "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#BDT learned Mass check1
        #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF",          "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#put it out because array

        #dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' ); #not used by BDT
        #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV",           "DIRA(#Lambda*)",                        ""  ,  'F' ); #not used
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda*)",               "",    'F' ); #not used
        #dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' ); #not used

    # Add Spectator Variables: not used for Training but written in final TestTree
    #dataloader.AddSpectator( "B_M",                                   "M(#Lambda_{b})",                           "MeV");
    #dataloader.AddSpectator( "Lambda_1520_0_M",                       "M(#Lambda*)",                         "MeV");

    # Read input data
    if gSystem.AccessPathName(sigfname) != 0:
        print "Can not find %s" % sigfname
    if gSystem.AccessPathName(bkgfname) != 0:
        print "Can not find %s" % bkgfname

    inputSig = TFile.Open(sigfname)
    inputBkg = TFile.Open(bkgfname)

    # Get the signal and background trees for training
    signal = inputSig.Get(treeNameSig)
    background = inputBkg.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree(signal, signalWeight)
    dataloader.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    dataloader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : dataloader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: dataloader.SetBackgroundWeightExpression("weight1*weight2");
    #dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut(
        ""
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample
    #print(sigfname + str( mycutSig ) + treeNameSig)

    mycutBkg = TCut(
        "B_M>6120"
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" )
    #print(bkgfname + str( mycutBkg ) + treeNameBkg)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            dataloader,
            TMVA.Types.kMLP,
            "MLP",
            #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try
            "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )  #Old

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2"
        )  #Settings3
        #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote
        #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old

    if "BDT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    print("FLAG 0")
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
Ejemplo n.º 29
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:s:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    SeedN = DEFAULT_SEED
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-s", "--seed"):
            SeedN = long(a)
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth
    outfname = "dataset/weights/TMVA_" + Note + ".root"

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    fClassifier = TMVA.Factory(
        "VariableImportance",
        "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"
    )
    str_xbitset = '{:053b}'.format(SeedN)

    seeddl = TMVA.DataLoader(str_xbitset)

    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'

    index = 52

    for iVar in varList:
        if (str_xbitset[index] == '1'):
            seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F')
            print iVar[0]
        index = index - 1

    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")

    seeddl.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    bkgList = varsList.bkg

    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir + bkgList[i]))
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        seeddl.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1

    seeddl.SetSignalWeightExpression(weightStrS)
    seeddl.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    seeddl.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting)
    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT")
    print "ROC-integral : ", str_xbitset, " ", SROC
    print "SEED " + str_xbitset + " DONE"
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()
    print "=================================================================="
    print "=================================================================="
Ejemplo n.º 30
0
file2 = open('../mvavars2.txt')
file3 = open('../mvavars3.txt')

mvavars1 = []
mvavars2 = []
mvavars3 = []

for line in file1:
	mvavars1.append(line.strip())
for line in file2:
	mvavars2.append(line.strip())
for line in file3:
	print line
	mvavars3.append(line.strip())

reader3 = TMVA.Reader()
var3_ = []
for i, var3 in enumerate(mvavars1):
	var3_.append(array.array('f',[0]))
	reader3.AddVariable(var3,var3_[i])
reader3.BookMVA("MLP","weights/TMVAClassification_HZZ_2e2mu_MLP.weights.xml")


#weight	weight	ele1_pt	ele1_eta	ele1_phi	ele1_charge	ele1_trackIso	ele1_EcalIso	ele1_HcalIso	ele1_X	ele1_SIP	ele2_pt	ele2_eta	ele2_phi	ele2_charge	ele2_trackIso	ele2_EcalIso	ele2_HcalIso	ele2_X	ele2_SIP	ele3_pt	ele3_eta	ele3_phi	ele3_charge	ele3_trackIso	ele3_EcalIso	ele3_HcalIso	ele3_X	ele3_SIP	ele4_pt	ele4_eta	ele4_phi	ele4_charge	ele4_trackIso	ele4_EcalIso	ele4_HcalIso	ele4_X	ele4_SIP	

#worst_iso_X	second_worst_iso_X	worst_vertex	second_worst_vertex	mZ	mZstar	mbestH	index	channel	sample	end

def loop(hweights, sig, hs, c1):


	#------- sig events ---------
Ejemplo n.º 31
0
def main(o, args):

    # Import TMVA classes from ROOT
    from ROOT import TMVA, TFile, TCut

    # Output file
    outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE')

    atype = "Classification"
    if hasattr(o, "type"):
        atype = str(o.type)
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s"
        % atype)

    # Set verbosity
    factory.SetVerbose(o.verbose)

    TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir

    # variables
    if type(o.variables) == str:
        o.variables = [
            v.lstrip().rstrip() for v in o.variables.split(":") if v != ""
        ]
    allvars = ""
    for v in o.variables:
        factory.AddVariable(str(v))
        if allvars != "": allvars += ":"
        allvars += v.split(":=")[0].lstrip(" ").rstrip(" ")
    #print "variables %s" % allvars

    #print o.spectators
    for s in o.spectators:
        if not s in o.variables:
            factory.AddSpectator(str(s))

    # categories and sub categories
    categories = []
    subcategories = []
    if hasattr(o, "subcategories") and len(o.subcategories) > 0:
        subcategories = o.subcategories[0]
        for sc in o.subcategories[1:]:
            subcategories = map(
                lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" %
                           (x[0][1], x[1][1])),
                itertools.product(subcategories, sc))

    for cut, name, vars in o.categories:
        myvars = allvars
        if vars != "":
            for v in vars.split(":"):
                myvars = myvars.replace(v, "").replace("::", ":")
            myvars = myvars.rstrip(":")

        vars = str(myvars)
        #print vars

        if len(subcategories) > 0:
            for subcut, subname in subcategories:
                if subname == "":
                    subname = subname.replace(" ", "").replace(
                        ">", "_gt_").replace("<", "_lt_").replace(
                            "=", "_eq_").replace("&", "_and_")
                fullname = "%s_%s" % (name, subname)
                categories.append(
                    (TCut(cut) * TCut(subcut), str(fullname), vars))
        else:
            categories.append((TCut(cut), str(name), vars))

    # load tree
    selection = TCut(o.selection)
    from ROOT import TTree
    from array import array
    for evclass, info in o.classes.iteritems():
        samples = info["samples"]
        for filePattern, name, weight, cut, ttype, ID in samples:
            #tcut=TCut(cut)*selection
            chain = mkChain(getListOfFiles(o.indir, filePattern), name)
            #            print ID
            #            br = array('i',[ID])
            #            #use an auxiliary tree to store the procID...
            #            tempf = TFile('procID_'+str(name.split('/')[-1])+'_'+str(ttype)+'.root', "RECREATE")
            #            TF = TTree("TF_"+str(name.split('/')[-1])+'_'+str(ttype), "friend tree with procID")
            #            TF.Branch("procID", br, "procID/I" )
            #            for i in range(chain.GetEntries()):
            #                TF.Fill()
            #            TF.Write()
            #            tempf.Close()
            #            #... and add it as a friend to the main chain
            #            chain.AddFriend('TF_'+str(name.split('/')[-1])+'_'+str(ttype),'procID_'+str(name.split('/')[-1])+'_'+str(ttype)+'.root')
            #then add the chain
            for s in o.commonCuts:
                cut = cut + " * (" + str(s) + ")"
            tcut = TCut(cut)
            factory.AddTree(chain, str(evclass), float(weight), tcut,
                            int(ttype))
        # weights
        if "weight" in info:
            weight = info["weight"]
            factory.AddSpectator(str("%s_wei := %s" % (evclass, weight)))
            factory.SetWeightExpression(str(weight), str(evclass))
        else:
            factory.SetWeightExpression("1.", str(evclass))

    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        TCut(""), "SplitMode=Random:NormMode=NumEvents:!V")

    # --------------------------------------------------------------------------------------------------
    # Fisher discriminant (same as LD)
    defaultSettings = {
        "BDT":
        "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedBoost:NegWeightTreatment=NoNegWeightsInTraining"
        ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=2000:MinNodeSize=0.125"
        ":Shrinkage=0.1:NTrees=200:!UseYesNoLeaf:MaxDepth=3",
        "Cuts":
        "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart"
    }
    if "FisherD" in o.methods:
        mname = "FisherD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D")

    if "Fisher" in o.methods:
        mname = "Fisher%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, TString(mname))

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(cut, vars, TMVA.Types.kFisher,
                            "%s_%s" % (mname, name),
                            "!H:!V:Fisher:!CreateMVAPdfs")

    if "Likelihood" in o.methods:
        mname = "Likelihood%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "LikelihoodD" in o.methods:
        mname = "LikelihoodD%s" % o.label
        fcats = factory.BookMethod(TMVA.Types.kCategory, mname)

        for cut, name, vars in categories:
            print "booking sub-category classifier : %s %s %s" % (cut, name,
                                                                  vars)
            fcats.AddMethod(
                cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name),
                "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150"
            )

    if "BDT" in o.methods:
        mname = "BDT%s" % o.label
        settings = defaultSettings["BDT"]
        if hasattr(o, "settings") and "BDT" in o.settings:
            settings = str(o.settings["BDT"])
        if len(categories) == 0:
            print "booking method %s with settings %s" % (TMVA.Types.kBDT,
                                                          settings)
            cats = factory.BookMethod(TMVA.Types.kBDT, TString(mname),
                                      settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kBDT,
                               "%s_%s" % (mname, name), settings)

    if "Cuts" in o.methods:
        mname = "Cuts%s" % o.label
        settings = defaultSettings["Cuts"]
        if hasattr(o, "settings") and "Cuts" in o.settings:
            settings = str(o.settings["Cuts"])
        if len(categories) == 0:
            cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings)
        else:
            cats = factory.BookMethod(TMVA.Types.kCategory, mname)

            for cut, name, vars in categories:
                print "booking sub-category classifier : %s %s %s" % (
                    cut, name, vars)
                cats.AddMethod(cut, vars, TMVA.Types.kCuts,
                               "%s_%s" % (mname, name), settings)

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
    if o.optimize:
        print "Optimizing?"
        factory.OptimizeAllMethods()

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
Ejemplo n.º 32
0
def TMVARegression():

    try:
        # retrieve command line options
        shortopts  = "a:o:vh?"
        longopts   = ["analysis=","outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )
        
    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)
        
    _outfname   = OUTFNAME
    _analysis   = ANALYSIS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-o", "--outputfile"):
            _outfname = a
        elif o in ("-a", "--analysis"):
            _analysis = a
        elif o in ("-v", "--verbose"):
            verbose = True

    
            
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( _outfname, 'RECREATE' )

    #   // Create the factory object. Later you can choose the methods
    #   // whose performance you'd like to investigate. The factory will
    #   // then run the performance analysis for you.
    #   //
    #   // The first argument is the base of the name of all the
    #   // weightfiles in the directory weights_Reg/ 
    #   //
    #   // The second argument is the output file for the training results
    #   // All TMVA output can be suppressed by removing the "!" (not) in 
    #   // front of the "Silent" argument in the option string
    factory = TMVA.Factory ("TMVARegression", outputFile, 
                                                   "!V:!Silent:Color:DrawProgressBar" )
    # Set verbosity
    factory.SetVerbose( verbose )
    
    TMVA.gConfig().GetIONames().fWeightFileDir = "weights_Reg_8TeV" + "_" + _analysis

    if _analysis == "Dijet":
  
        factory.AddVariable("hJet_pt", "hJet_pt", "units", 'F')
        factory.AddVariable("hJet_eta", "hJet_eta", "units", 'F')
        factory.AddVariable("hJet_phi", "hJet_phi", "units", 'F')
        factory.AddVariable("hJet_e", "hJet_e", "units", 'F')
        factory.AddVariable("hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "units", 'F')
        factory.AddVariable("hJet_Mt:=evalMt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Mt", "units", 'F')
        factory.AddVariable("hJet_Et:=evalEt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Et", "units", 'F')
        factory.AddVariable("hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F')
        factory.AddVariable("hJet_vtxPt", "hJet_vtxPt", "units", 'F')
        factory.AddVariable("hJet_vtx3dL", "hJet_vtx3dL", "units", 'F')
        factory.AddVariable("hJet_vtx3deL", "hJet_vtx3deL", "units", 'F')
        factory.AddVariable("hJet_vtxMass", "hJet_vtxMass", "units", 'F')
        factory.AddVariable("hJet_chf", "hJet_chf", "units", 'F')
        factory.AddVariable("hJet_nch", "hJet_nch", "units", 'F')
        factory.AddVariable("hJet_nconstituents", "hJet_nconstituents", "units", 'F')
        factory.AddVariable("hJet_JECUnc", "hJet_JECUnc", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, hJet_phi[0], hJet_phi[1])","METdPhi", "units",'F')

        #Add the variable carrying the regression target
        factory.AddTarget( "hJet_genPt" )

    elif _analysis == "Subjet":

        factory.AddVariable("fathFilterJets_pt", "fathFilterJets_pt", "units", 'F')
        factory.AddVariable("fathFilterJets_eta", "fathFilterJets_eta", "units", 'F')
        factory.AddVariable("fathFilterJets_phi", "fathFilterJets_phi", "units", 'F')
        factory.AddVariable("fathFilterJets_e", "fathFilterJets_e", "units", 'F')
        factory.AddVariable("fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "units", 'F')
        factory.AddVariable("fathFilterJets_Mt:=evalMt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Mt", "units", 'F')
        factory.AddVariable("fathFilterJets_Et:=evalEt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Et", "units", 'F')
        factory.AddVariable("fathFilterJets_ptLeadTrack", "fathFilterJets_ptLeadTrack", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxPt", "fathFilterJets_vtxPt", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3dL", "fathFilterJets_vtx3dL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtx3deL", "fathFilterJets_vtx3deL", "units", 'F')
        factory.AddVariable("fathFilterJets_vtxMass", "fathFilterJets_vtxMass", "units", 'F')
        factory.AddVariable("fathFilterJets_chf", "fathFilterJets_chf", "units", 'F')
        factory.AddVariable("rho25", "rho25", "units", 'F')
        factory.AddVariable("MET.et", "MET.et", "units", 'F')
        factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, fathFilterJets_phi[0], fathFilterJets_phi[1])","METdPhi", "units",'F')

        factory.AddTarget("fathFilterJets_genPt")

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1) 

    ## Get the Signal trees
    en7TeV = False
    en8TeV = True

    regWeight = 1.
    chain = TChain("tree")

    if en7TeV: #change the ntuple names later!!
        chain.Add("Step2_output_May11/WH_125_ForRegression.root")
        chain.Add("Step2_output_May11/WH_115_ForRegression.root")
        chain.Add("Step2_output_May11/WH_120_ForRegression.root")
        chain.Add("Step2_output_May11/WH_130_ForRegression.root")
        chain.Add("Step2_output_May11/WH_135_ForRegression.root")
        

    #if en8TeV and _analysis == "Dijet":
    #    chain.Add("dcache:/pnfs/cms/WAX/11/store/user/lpchbb/apana/Step1V33_Step2_V2/DiJetPt_ZH_ZToLL_HToBB_M-110_8TeV-powheg-herwigpp.root")
        

    if en8TeV: 
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_110_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_115_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_120_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_125_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_130_summer12_33b.root")
        chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_135_summer12_33b.root")

        
    NEntries = chain.GetEntries()
    print "Number of entries on Chain:",NEntries

    regTree = chain
    
    factory.AddRegressionTree( regTree, regWeight )

    #This would set individual event weights (the variables defined in the 
    #expression need to exist in the original TTree)
    #factory->SetWeightExpression( "var1", "Regression" )


    if _analysis == "Dijet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"         + " && " +\
            "hJet_pt[0] > 20.0"                     + " && " +\
            "hJet_pt[1] > 20.0"                     + " && " +\
            "hJet_genPt[0] > 0.0"                   + " && " +\
            "hJet_genPt[1] > 0.0"                   + " && " +\
            "hJet_eta[0] < 2.4"                     + " && " +\
            "hJet_eta[1] < 2.4"                     + " && " +\
            "hJet_id[0] > 0.0"                      + " && " +\
            "hJet_id[1] > 0.0"                      + " && " +\
            "max(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "min(hJet_csv[0],hJet_csv[1]) > 0.0"    + " && " +\
            "H.pt > 100"


    elif _analysis == "Subjet":
        cutString=\
            "(Vtype == 0 || Vtype == 1)"                   + " && " +\
            "fathFilterJets_pt[0] > 20.0"                     + " && " +\
            "fathFilterJets_pt[1] > 20.0"                     + " && " +\
            "fathFilterJets_genPt[0] > 0.0"                   + " && " +\
            "fathFilterJets_genPt[1] > 0.0"                   + " && " +\
            "fathFilterJets_eta[0] < 2.4"                     + " && " +\
            "fathFilterJets_eta[1] < 2.4"                     + " && " +\
            "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0"    + " && " +\
            "FatH.filteredpt > 100"

    else:
        print "Problem specifying analysis. Please choose Dijet or Subjet."
        sys.exit(1)

    print cutString
    mycut = TCut( cutString )
        
    
    # tell the factory to use all remaining events in the trees after training for testing. The number is 25% of the events after cuts:
    if en7TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=125000:nTest_Regression=125000:SplitMode=Random:NormMode=NumEvents:!V" )
    if en8TeV:
        factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=111000:nTest_Regression=111000:SplitMode=Random:NormMode=NumEvents:!V" )

    #If no numbers of events are given, half of the events in the tree are used 
    #for training, and the other half for testing:
    #factory.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );  

    #---- Book MVA methods
   
    #please lookup the various method configuration options in the corresponding cxx files, eg:
    #src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    #it is possible to preset ranges in the option string in which the cut optimisation should be done:
    #"...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable


    #Boosted Decision Trees
    factory.BookMethod( TMVA.Types.kBDT, "BDT",
                        "!H:!V:NTrees=60:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" )

    
    # -------------------------------------------------------------------------------------------

    #---- Now you can tell the factory to train, test, and evaluate the MVAs

    # Train MVAs using the set of training events
    factory.TrainAllMethods()

    # ---- Evaluate all MVAs using the set of test events
    factory.TestAllMethods()

    # ----- Evaluate and compare performance of all configured MVAs
    factory.EvaluateAllMethods()    

    # --------------------------------------------------------------

    
    NEntries = regTree.GetEntries()
    print "Number of entries on Tree: ",NEntries

    # Save the output
    outputFile.Close()

    print "==> Wrote root file %s\n" % _outfname
    print "==> TMVARegression is done!\n"       
Ejemplo n.º 33
0
from rootpy.tree import Cut
from rootpy.io import root_open
from ROOT import TMVA

infile = root_open('sample.root')
outfile = root_open('tmva_output.root', 'recreate')
factory = TMVA.Factory("TMVAClassification", outfile,
                       "AnalysisType=Classification")
factory.AddVariable("a", 'F')
factory.AddVariable("b", 'F')
factory.SetInputTrees(infile.sample, Cut('label==1'), Cut('label==0'))
factory.PrepareTrainingAndTestTree(Cut(), Cut(),
                                   "SplitMode=Random:NormMode=NumEvents")
factory.BookMethod(
    TMVA.Types.kBDT, "BDT",
    "NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=-1"
)
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
outfile.close()
infile.close()
Ejemplo n.º 34
0
def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    factory.AddVariable( "MMC_h2massweight1_prob", "MMC_h2massweight1_prob", "", 'F' ) ##ADDED

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run() 
    '', 'F', 0.00, 2.00,
    array.array('f', [0]),
    TBranch(), 0.00, 1.10, 22
]
variables['pttot_lep1_lep2'] = [
    '', 'F', 0.00, 2.00,
    array.array('f', [0]),
    TBranch(), 0.00, 1.10, 22
]
variables['ptdiff_lep1_lep2'] = [
    '', 'F', 0.00, 2.00,
    array.array('f', [0]),
    TBranch(), 0.00, 1.10, 22
]

reader = TMVA.Reader()
hists = {}

for varName, var in sorted(variables.iteritems()):
    if withRecoTraining and withRelativeVariables and varName == Energy_scale:
        continue
    reader.AddVariable(varName, var[4])

if withRecoTraining:
    a = array.array('f', [0])
    reader.AddSpectator("truth_mass", a)
    reader.AddSpectator("visible_mass", a)
    reader.AddSpectator("mmc0_resonance_m", a)
    reader.AddSpectator("mmc1_resonance_m", a)
    reader.AddSpectator("mass_collinear_tau1_tau2", a)