def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA.root', 'RECREATE')
    factory = TMVA.Factory('TMVAClassification', output,
            '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('dataset')
Exemple #2
0
def get_correlation_matrix(year, variables):
    # Returns the correlation matrix of the given variables
    # Get signal and background paths
    signal_path = os.path.join(os.getcwd(),
                               varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018,
                               varsList.sig2017_0[0] if year == 2017 else varsList.sig2018_0[0])
    bkgrnd_path = os.path.join(os.getcwd(),
                               varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018,
                               varsList.bkg2017_0[0] if year == 2017 else varsList.bkg2018_0[0])

    # Create TMVA object
    loader = TMVA.DataLoader("tmva_data")

    # Load used variables
    for var in variables:
        try:
            var_data = varsList.varList["DNN"][[v[0] for v in varsList.varList["DNN"]].index(var)]
            loader.AddVariable(var_data[0], var_data[1], var_data[2], "F")
        except ValueError:
            print("[WARN] The variable {} was not found. Omitting.".format(var))

    # Open ROOT files
    signal_f = TFile.Open(signal_path)
    signal = signal_f.Get("ljmet")
    bkgrnd_f = TFile.Open(bkgrnd_path)
    bkgrnd = bkgrnd_f.Get("ljmet")

    # Load signal and background
    loader.AddSignalTree(signal)
    loader.fTreeS = signal
    loader.AddBackgroundTree(bkgrnd)
    loader.fTreeB = bkgrnd

    # Set weights
    loader.SetSignalWeightExpression(weight_string)
    loader.SetBackgroundWeightExpression(weight_string)

    # Set cuts
    loader.PrepareTrainingAndTestTree(
        cut_string, cut_string,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V:VerboseLevel=Info"
    )
    
    # Set the pointer to the right histogram
    loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection()
    
    # Retrieve the signal correlation matrix
    sig_th2 = loader.GetCorrelationMatrix("Signal")

    n_bins = sig_th2.GetNbinsX()
    sig_corr = np.zeros((n_bins, n_bins))
    
    for x in range(n_bins):
        for y in range(n_bins):
            sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1)
    
    return sig_corr
Exemple #3
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA_SSSF.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
    # factory = TMVA.Factory('TMVAClassification', output,   '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('datasetSSSF04Feb')
    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        print sampleName
        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V')

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C2",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=200:MaxDepth=2" );
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C1",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=100:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C05",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=50:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4500",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4750",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=750:MaxDepth=2"
    )

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Exemple #4
0
def TMVANN(filenameSTRING, outputDOTrootSTRING, sigtreeSTRINGS, bkgtreeSTRING,
           variablesSTRING):
    NNfile = R.TFile(outputDOTrootSTRING, "recreate")
    NNfile.Close()
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)  #importing the datasetsignalslowlevel
        signaltree = file.Get(sigtreeSTRING)  #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING)  #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING + weights)
        bkgweights = file.Get(bkgtreeSTRING + weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(
            outputDOTrootSTRING,
            "update")  #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory(
            "TMVANN", NNfile,
            "V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
        )
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        datasetsignalslowlevel = TMVA.DataLoader(
            "datasetsignalslowlevel")  #Instantiating a datasetsignalslowlevel
        datasetsignalslowlevel.AddSignalTree(signaltree, 1.)  #adding signal
        datasetsignalslowlevel.AddBackgroundTree(backgroundtree,
                                                 1.)  #adding background
        print(sigweights)
        datasetsignalslowlevel.SetSignalWeightExpression(weights)
        datasetsignalslowlevel.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:  #adding our training variables to the TMVA
            datasetsignalslowlevel.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        datasetsignalslowlevel.PrepareTrainingAndTestTree(
            signalcut, backgroundcut,
            "nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V"
        )
        TMVAfactory.BookMethod(
            datasetsignalslowlevel, TMVA.Types.kMLP,
            "LowLevelNN_3layer25,20,10_100Epoch_tanhNeuron" + sigtreeSTRING,
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=25,20,10:TestRate=5"
        )

        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
    NNfile = R.TFile(outputDOTrootSTRING, "update")
Exemple #5
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    dataloader = TMVA.DataLoader('dataset_8Feb')
    output = TFile.Open('TMVA16.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut),
        'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents')

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT",
        "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500"
    )
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT8","!H:!V:NTrees=1500:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.75:SeparationType=GiniIndex:nCuts=1000" );

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=1200:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=800" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT3", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT4", "!H:!V:NTrees=700:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07"    ,   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
def getCorrelationMatrix(sigFile, bkgFile, weightStr, cutStr,
                         varList):  # gets the correlation matrix as np array
    varNames = []
    loader = TMVA.DataLoader("dataset")
    for var in varList:
        if var[0] in "NJets_MultiLepCalc":
            loader.Addvariable(var[0], var[1], var[2], "I")
        else:
            loader.AddVariable(var[0], var[1], var[2], "F")
        varNames.append(var[0])

    # open the root files
    input_sig = TFile.Open(sigFile)
    signal = input_sig.Get("ljmet")
    input_bkg = TFile.Open(bkgFile)
    background = input_bkg.Get("ljmet")

    # load in the trees
    loader.AddSignalTree(signal)
    loader.fTreeS = signal
    loader.AddBackgroundTree(background)
    loader.fTreeB = background

    # set weights
    loader.SetSignalWeightExpression(weightStr)
    loader.SetBackgroundWeightExpression(weightStr)

    # set cuts
    loader.PrepareTrainingAndTestTree(
        cutStr, cutStr,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # set the pointer to the right histogram
    loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection()

    # retrieve the signal correlation matrix
    sig_th2 = loader.GetCorrelationMatrix("Signal")
    # bkg_th2 = loader.GetCorrelationMatrix("Background")

    # convert to numpy array
    n_bins = sig_th2.GetNbinsX()
    sig_corr = np.zeros((n_bins, n_bins))

    for x in range(n_bins):
        for y in range(n_bins):
            sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1)

    return sig_corr, varNames
Exemple #7
0
def TMVANN (filenameSTRING,outputDOTrootSTRING,sigtreeSTRINGS,bkgtreeSTRING,variablesSTRING):
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)          #importing the dataset
        signaltree = file.Get(sigtreeSTRING)    #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING+weights)
        bkgweights = file.Get(bkgtreeSTRING+weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(outputDOTrootSTRING,"recreate")      #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory("TMVANN",NNfile,"V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification")
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        dataset = TMVA.DataLoader("dataset")     #Instantiating a dataset
        dataset.AddSignalTree(signaltree,1.)     #adding signal
        dataset.AddBackgroundTree(backgroundtree ,1.) #adding background
        print(sigweights)
        #dataset.SetSignalWeightExpression(weights)
        #dataset.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:                #adding our training variables to the TMVA
            dataset.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        dataset.PrepareTrainingAndTestTree(signalcut,backgroundcut,"nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V")
        #using all signal and background points to train, random selection, normalised to summed event weights = number of events for each tree, no verbose

        #Booking some methods
        #TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8:TestRate=5")         #Artifical Neural Network 1 layers 500 epoch

        TMVAfactory.BookMethod(dataset,TMVA.Types.kFisher, "FisherMethod","H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10")    #Fisher Method
        TMVAfactory.BookMethod(dataset,TMVA.Types.kLikelihood, "BayesLikelihood","H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=60")    #Bayes likleihood
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_4Layer8Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8,8,8,8:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer6Node_500Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=6:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer3Node_100Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=3:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_2Layer5Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=5,5:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch





        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
        print ('TMVANN Ran & made ROOT file ' + outputDOTrootSTRING+sigtreeSTRING)
Exemple #8
0
 def _dataLoader(self, sigTreeNames, bkgTreeNames):
     self._data_loader = TMVA.DataLoader(self._options['factory']['name'])
     for value in self._variables.values():
         self._data_loader.AddVariable(value['name'], value['type'])
     #----
     for sigTreeName in sigTreeNames:
         self._data_loader.AddSignalTree(
             self._trees[sigTreeName + "_Train"], 1.0, "train")
         self._data_loader.AddSignalTree(self._trees[sigTreeName + "_Test"],
                                         1.0, "test")
     for bkgTreeName in bkgTreeNames:
         self._data_loader.AddBackgroundTree(
             self._trees[bkgTreeName + "_Train"], 1.0, "train")
         self._data_loader.AddBackgroundTree(
             self._trees[bkgTreeName + "_Test"], 1.0, "test")
     self._data_loader.SetSignalWeightExpression(
         self._options['factory']['weight'])
     self._data_loader.SetBackgroundWeightExpression(
         self._options['factory']['weight'])
     #----
     self._data_loader.PrepareTrainingAndTestTree(
         TCut(self._cuts['sig']), TCut(self._cuts['bkg']),
         self._options['prepareTrees'])
Exemple #9
0
)
#Locate and add data files
file_VBF_HH_2016 = "../inputsamples/2016/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root"
file_VBF_HH_2017 = "../inputsamples/2017/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root"
file_GGF_HH_2016 = "../inputsamples/2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph.root"
file_GGF_HH_2017 = "../inputsamples/2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg.root"
ch_sig = TChain("bbbbTree")
ch_bkg = TChain("bbbbTree")
ch_sig.AddFile(file_VBF_HH_2016)
ch_sig.AddFile(file_VBF_HH_2017)
#ch_sig.AddFile(file_VBF_HH_BSM_2016)
#ch_sig.AddFile(file_VBF_HH_BSM_2017)
ch_bkg.AddFile(file_GGF_HH_2016)
ch_bkg.AddFile(file_GGF_HH_2017)
#Load data to TMVA
dataloader = TMVA.DataLoader('GGFKiller')
dataloader.AddVariable("abs_H1_eta:=abs(H1_eta)")
dataloader.AddVariable("abs_H2_eta:=abs(H2_eta)")
dataloader.AddVariable("H1_pt")
dataloader.AddVariable("H2_pt")
dataloader.AddVariable("JJ_j1_pt")
dataloader.AddVariable("JJ_j2_pt")
dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)")
dataloader.AddVariable("h1h2_deltaEta")
dataloader.AddVariable("h1j1_deltaR")
dataloader.AddVariable("h1j2_deltaR")
dataloader.AddVariable("h2j1_deltaR")
dataloader.AddVariable("h2j2_deltaR")
dataloader.AddVariable("abs_j1etaj2eta:=abs(j1etaj2eta)")
dataloader.AddVariable("abs_costh_HH_b1_cm:=abs(costh_HH_b1_cm)")
dataloader.AddVariable("abs_costh_HH_b2_cm:=abs(costh_HH_b2_cm)")
Exemple #10
0
def main():

    NTupName   = "JetTree"
    alg        = "TruthRawTrim"
    cuts       =  ["eta>-1.2","eta<1.2","pt>300","pt<5000","m>61","m<85"]
    vars       = ["Tau21","T2jet"]

    #NTupName   = "varTree"    
    verbose    = True
    
    #alg        = "AK10LCTRIMF5R20"
    spectators = ["m"]
    #cuts       = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"]
    #vars       = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"]
    methods    = "Likelihood"

    print "Starting and getting arguments:"
    allargs = sys.argv[1:]    
    if len(allargs)<5:
        print "You input these args"
        print allargs
        print "Not enough args, please try again"
        return 1
    else:
        alg        = allargs[0]
        spectators = allargs[1].split(",")
        cuts       = allargs[2].split(",")
        vars       = allargs[3].split(",")
        methods    = allargs[4]
    
    print "Running with args:"
    print "  alg        = ",alg        
    print "  spectators = ",spectators 
    print "  cuts       = ",cuts       
    print "  vars       = ",vars       
    print "  methods    = ",methods    
    

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()



    #===============================
    #Read training and test data
    #===============================
    #InputDir = "../gen_20170529/"
    InputDir="../Ana_EventGeneration/"
    #InputDir="~/Downloads/"
    print "Getting inputs from: ",InputDir
    #s1 = TFile(InputDir+"ntuple_ttbar_2000.root");
    #b1 = TFile(InputDir+"ntuple_dijet_800_1400.root");
    s1 = TFile(InputDir+"ntuple_tt_test10000.root");
    b1 = TFile(InputDir+"ntuple_dijet_test10000.root");

    # Output file
    OutFileName="testout.root"
    outputFile = TFile( OutFileName, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

    # data loader
    dataloader=TMVA.DataLoader("dataset")

#     weight=""
#     weight+="pass_selection*EventWeight*CrossSection*("
#     weight+=alg+"_pt>"+pt1+" && "
#     weight+=alg+"_pt<"+pt2
#     if m1!="0":
#         weight+=" && "+alg+"_m>"+m1+" && "
#         weight+=alg+"_m<"+m2
#     weight+=")"
#     
#     #Get signal and background histograms
#     if variable=="mass":
#         histname = "akt10"+alg+"_trim_"+variable
#     else:
#         histname = alg+"_"+variable
    
    #======================================
    #Predefined cuts - for isntance on M(j1)
    #======================================
    mycuts = "1.0"
    mycutb = "1.0"

    for cut in cuts:
        placecut=cut
        if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="m":
            placecut = "* ("+alg+"_"+cut+")"
        else:
            placecut="* ("+cut+") "
        mycuts += placecut
        mycutb += placecut

    
    print "MyCutsSig: ",mycuts
    print "MyCutsBkg: ",mycutb

    #===================================
    #Spectator variables from tree
    #=====================================
    for spec in spectators:
        dataloader.AddSpectator( spec, 'F' )
        
    #===================================
    #MVA variables from tree
    #=====================================
    for var in vars:
        dataloader.AddVariable( var , 'F' )

    #===============================
    #Read training and test data
    #===============================
    print "Getting trees ... ",NTupName
    st1 = s1.Get(NTupName)
    bt1 = b1.Get(NTupName)

    #=========================================
    # global event weights per tree (see below for setting event-wise weights)
    #=========================================
    ws1 = 1.0
    wb1 = 1.0

    #=========================================
    # You can add an arbitrary number of signal or background trees
    #=========================================
    dataloader.AddSignalTree    ( st1, ws1 );
    #SCHSU
    #dataloader.SetSignalWeightExpression("EventWeight*CrossSection");

    dataloader.AddBackgroundTree( bt1, wb1 );
    #dataloader.SetBackgroundWeightExpression("EventWeight*CrossSection");
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    mycutSig = TCut(mycuts)
    mycutBkg = TCut(mycutb)
    
    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
# DEFAULT
#         factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
#                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )
# CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )


    if "PDERSD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % OutFileName
    print "=== TMVAClassification is done!\n"
Exemple #11
0
output = TFile.Open('BinaryClassificationKeras.root', 'RECREATE')
factory = TMVA.Factory(
    'TMVAClassification', output,
    '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
)

# Load data
if not isfile('tmva_class_example.root'):
    call(['curl', '-O', 'http://root.cern.ch/files/tmva_class_example.root'])

data = TFile.Open('tmva_class_example.root')
signal = data.Get('TreeS')
background = data.Get('TreeB')

dataloader = TMVA.DataLoader('BinaryClassificationKeras')
for branch in signal.GetListOfBranches():
    dataloader.AddVariable(branch.GetName())

dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(
    TCut(''),
    'nTrain_Signal=4000:nTrain_Background=4000:SplitMode=Random:NormMode=NumEvents:!V'
)

# Generate model

# Define model
model = Sequential()
model.add(Dense(64, init='glorot_normal', activation='relu', input_dim=4))
Exemple #12
0
def add_variables(data_loader, good_variables):

    for variable in good_variables:
        if variable != "Bc_M":
            data_loader.AddVariable(variable, "F")

    return data_loader


if __name__ == '__main__':
    decay_name = decay_names[decay_nb]
    output_file = TFile("~/TMVA/TMVAoutput" + decay_name + str(run) + ".root",
                        "RECREATE")
    factory = TMVA.Factory("TMVA_" + decay_name, output_file,
                           "DrawProgressBar=True")
    data_loader = TMVA.DataLoader("dataloader")

    moca_tree = dhand.combine_trees(run, decay_name, True)
    data_tree = dhand.combine_trees(run, decay_name, False)
    branches_to_keep = dhand.branch_selection(data_tree, branches, [])
    moca_tree = dhand.activate_branches(moca_tree, branches_to_keep)
    data_tree = dhand.activate_branches(data_tree, branches_to_keep)

    add_variables(data_loader, branches_to_keep)
    sgcut_test = TCut("runNumber%5==" + kfold +
                      "&& (Bc_M > 5200 && Bc_M < 5400)")
    sgcut_train = TCut("runNumber%5!=" + kfold +
                       "&& (Bc_M > 5200 && Bc_M < 5400)")
    bgcut_test = TCut("runNumber%5==" + kfold + "&& Bc_M > 5400")
    bgcut_train = TCut("runNumber%5!=" + kfold + "&& Bc_M > 5400")
Exemple #13
0
    trainfilename="proctrain.csv.root" 


    trainfile = TFile.Open(trainfilename,"read")
    traintree = trainfile.Get(traintree_name)
    
    TMVA.Tools.Instance()
    
    
    # create the tmva output file, which will be full of details about the training
    fout = TFile("tmvatest.root","RECREATE")


    # use the default factory
    factory = TMVA.Factory("TMVAClassification", fout)                                
    dataloader = TMVA.DataLoader("dataset")


    # build the list of variables
    al=traintree.GetListOfBranches()
    varlist=[]
    for i in range(al.GetEntries()):
        varlist+=[al[i].GetName()]

        
    if debug:
        print "all variables of ",trainfile, " ", varlist
        print "now stripping EventId Weight and Label "

    # these three variables should not be used for training
    mva_input_list=[e for e in varlist if not e in ['EventId','Weight','Label']] 
def main():  # runs the program
    checkRootVer()  # check that ROOT version is correct

    try:  # retrieve command line options
        shortopts = "d:o:v:w:y:h?"  # possible command line options
        longopts = [
            "dataset=", "option=", "where=", "year=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array(
        [  # Stores the command line arguments   
            ['-d', '--dataset', 'dataset', 'dataset'],
            ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017],
            ['-o', '--option', 'option', 0],
            ['-v', '--verbose', 'verbose', True]
        ],
        dtype="object")

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[index, 3] = str(
                arg)  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some containers
    bkg_list = []
    bkg_trees_list = []
    sig_list = []
    sig_trees_list = []

    # Initialize some variables after reading in arguments
    option_index = np.where(myArgs[:, 2] == 'option')[0][0]
    dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    DATASETPATH = myArgs[dataset_index][3]
    DATASET = DATASETPATH.split("/")[0]
    OPTION = myArgs[option_index][3]
    VERBOSE = myArgs[verbose_index][3]
    WHERE = myArgs[where_index][3]
    YEAR = myArgs[year_index][3]

    if WHERE == "lpc":
        if YEAR == 2017:
            inputDir = varsList.inputDirLPC2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirLPC2018
    else:
        if YEAR == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirBRUX2018

    if OPTION == "0":
        print("Using Option 0: default varList")
        varList = varsList.varList["DNN"]

    elif OPTION == "1":
        print("Using Option 1: selected data from {}".format(DATASETPATH))
        varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines()
        varList = []
        START = False
        for line in varsListHPO:
            if START == True:
                varList.append(str(line.strip()))
            if "Variable List:" in line:
                START = True

    numVars = len(varList)
    outf_key = str("Keras_" + str(numVars) + "vars")
    OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root"
    outputfile = TFile(OUTF_NAME, "RECREATE")

    # initialize and set-up TMVA factory

    factory = TMVA.Factory(
        "Training", outputfile,
        "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    factory.SetVerbose(bool(myArgs[verbose_index, 3]))
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key

    # initialize and set-up TMVA loader

    loader = TMVA.DataLoader(DATASET)

    if OPTION == "0":
        for var in varList:
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], 'I')
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")
    if OPTION == "1":
        for var in varList:
            if var == "NJets_MultiLepCalc":
                loader.AddVariable(var, "", "", "I")
            else:
                loader.AddVariable(var, "", "", "F")

    # add signal files
    if YEAR == 2017:
        for i in range(len(varsList.sig2017_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.sig2018_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add background files
    if YEAR == 2017:
        for i in range(len(varsList.bkg2017_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.bkg2018_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    loader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    ######################################################
    ######################################################
    ######                                          ######
    ######            K E R A S   D N N             ######
    ######                                          ######
    ######################################################
    ######################################################
    HIDDEN = 0
    NODES = 0
    LRATE = 0.
    PATTERN = ""
    REGULATOR = ""
    ACTIVATION = ""
    BATCH_SIZE = 0
    # modify this when implementing hyper parameter optimization:
    model_name = 'TTTT_' + str(numVars) + 'vars_model.h5'

    EPOCHS = 100
    PATIENCE = 20

    # edit these based on hyper parameter optimization results
    if OPTION == "0":
        HIDDEN = 3
        NODES = 100
        LRATE = 0.01
        PATTERN = 'static'
        REGULATOR = 'none'
        ACTIVATION = 'relu'
        BATCH_SIZE = 256
    if OPTION == "1":
        datasetDir = os.listdir(DATASETPATH)
        for file in datasetDir:
            if "params" in file: optFileName = file
        optFile = open(DATASETPATH + "/" + optFileName, "r").readlines()
        START = False
        for line in optFile:
            if START == True:
                if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip())
                if "Initial" in line: NODES = int(line.split(":")[1].strip())
                if "Batch" in line:
                    BATCH_SIZE = 2**int(line.split(":")[1].strip())
                if "Learning" in line:
                    LRATE = float(line.split(":")[1].strip())
                if "Pattern" in line: PATTERN = str(line.split(":")[1].strip())
                if "Regulator" in line:
                    REGULATOR = str(line.split(":")[1].strip())
                if "Activation" in line:
                    ACTIVATION = str(line.split(":")[1].strip())
            if "Optimized Parameters:" in line: START = True
    kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \
                   ':SaveBestOnly=true' + \
                   ':NumEpochs=' + str(EPOCHS) + \
                   ':BatchSize=' + str(BATCH_SIZE) + \
                   ':TriesEarlyStopping=' + str(PATIENCE)

    model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION,
                        numVars)
    model.save(model_name)
    model.summary()

    factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting)

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputfile.Close()

    print("Finished training in " + str(
        (time.time() - START_TIME) / 60.0) + " minutes.")

    ROC = factory.GetROCIntegral(DATASET, 'PyKeras')
    print('ROC value is: {}'.format(ROC))
    if OPTION == "1":
        varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a")
        varsListHPOtxt.write("ROC Value: {}".format(ROC))
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    loader = TMVA.DataLoader("dataset")
    
    # Set verbosity
#     factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]


    for iVar in varList:
        if iVar[0]=='NJets_JetSubCalc': loader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: loader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")

    loader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    bkgList = varsList.bkg
    
    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir+bkgList[i]))
        print inputDir+bkgList[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        loader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb)


    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    loader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    loader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : loader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: loader.SetBackgroundWeightExpression("weight1*weight2");
    loader.SetSignalWeightExpression( weightStrS )
    loader.SetBackgroundWeightExpression( weightStrB )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

# bdtSetting for "BDT" 
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTMitFisher" 
    bdtFSetting = '!H:!V:NTrees=%s' %nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTG" 
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTB" 
    bdtBSetting = '!H:!V:NTrees=%s' %nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTD" 
    bdtDSetting = '!H:!V:NTrees=%s' %nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
#Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m


#BOOKING AN ALGORITHM
#     if methods=="BDT": factory.BookMethod( TMVA.Types.kBDT, "BDT",bdtSetting)
    if methods=="BDT": factory.BookMethod( loader, TMVA.Types.kBDT, "BDT",bdtSetting)    
    if methods=="BDTG": factory.BookMethod( TMVA.Types.kBDT, "BDTG",bdtGSetting)
    if methods=="BDTMitFisher": factory.BookMethod( TMVA.Types.kBDT, "BDTMitFisher",bdtFSetting)
    if methods=="BDTB": factory.BookMethod( TMVA.Types.kBDT, "BDTB",bdtBSetting)
    if methods=="BDTD": factory.BookMethod( TMVA.Types.kBDT, "BDTD",bdtDSetting)
    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the loader to train, test, and evaluate the MVAs. 

    # Train MVAs
    print "train all method"
    factory.TrainAllMethods()

    print "test all method"
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    

    # Save the output.
    outputFile.Close()
    # save plots:
    os.chdir('dataset/weights/'+Note)

    if not gROOT.IsBatch(): TMVA.TMVAGui( outfname )
    print "DONE"
plt.style.use('ggplot')
RNG = np.random.RandomState(1)

# Create an example regression dataset
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + \
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
data.AddVariable('x', 'F')
data.AddTarget('y', 'F')

add_regression_events(data, X, y)
add_regression_events(data, X, y, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), '')

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
Exemple #17
0
def main():
    print "\n", "=" * 80
    print "\tDESY 2017 - classification with TMVA"
    print "=" * 80

    # summary root file
    summaryFilename = 'TMVA.root'

    # results directory
    resultsDir = 'results'
    os.system('mkdir -p %s' % resultsDir)

    #------------------------------------------------------------------
    # get signal file and associated Root tree
    sigFilename = "../public/ntuple_HZZ4L.root"
    sigFile, sigTree = getTree(sigFilename)

    # get background file and associated Root tree
    bkgFilename = "../public/ntuple_ZZ4L.root"
    bkgFile, bkgTree = getTree(bkgFilename)

    #------------------------------------------------------------------
    # create a factory for booking machine learning methods
    #------------------------------------------------------------------
    outputFile = TFile("TMVA.root", "recreate")
    options = '''
    !V
    Color
    !Silent
    DrawProgressBar
    AnalysisType=Classification
    Transformations=I;D
    '''
    factory = TMVA.Factory("Z1massZ2mass", outputFile, formatOptions(options))

    #------------------------------------------------------------------
    # set up data set for training and testing
    #------------------------------------------------------------------
    dataLoader = TMVA.DataLoader(resultsDir)

    # define variables to be used
    dataLoader.AddVariable("Z1mass", 'D')
    dataLoader.AddVariable("Z2mass", 'D')

    # define from which trees data are to be taken
    # from and the global weights to be assigned to
    # the training data
    sigWeight = 1.0
    dataLoader.AddSignalTree(sigTree, sigWeight)
    dataLoader.SetSignalWeightExpression("weight")

    bkgWeight = 1.0
    dataLoader.AddBackgroundTree(bkgTree, bkgWeight)
    dataLoader.SetBackgroundWeightExpression("weight")

    # you can apply cuts, if needed
    cut = TCut("")
    options = '''
    SplitMode=Random
    NormMode=EqualNumEvents
    nTrain_Signal=2500
    nTest_Signal=2500
    nTrain_Background=2500
    nTest_Background=2500
    !V 
    '''
    dataLoader.PrepareTrainingAndTestTree(
        cut,  # signal cut
        cut,  # background cut
        formatOptions(options))

    #------------------------------------------------------------------
    # ok, almost done, define machine learning methods to be run
    #------------------------------------------------------------------

    options = '''
    !H
    !V
    VarTransform=None
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kFisher, "Fisher",
                       formatOptions(options))

    options = '''
    !H
    !V
    BoostType=AdaBoost
    NTrees=200
    nEventsMin=100
    nCuts=50
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT",
                       formatOptions(options))

    options = '''
    !H
    !V
    NCycles=500
    VarTransform=N
    HiddenLayers=5
    TrainingMethod=BFGS
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP",
                       formatOptions(options))

    #------------------------------------------------------------------
    # ok, let's go!
    #------------------------------------------------------------------
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputFile.Close()
        'BDTF': 0,
        'MLP': 0,
        'MLPBFGS': 0,
        'MLPBNN': 0,
        'CFMlpANN': 0,
        'TMlpANN': 0
    }

    factory = t.Factory(
        'vbf_bdt_combined_james_current', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification'
    )
    factory.Print()

    t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir
    dataloader = t.DataLoader(".")

    dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F')
    dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F')
    dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F')
    dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F')
    dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F')
    dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F')
    dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F')
    dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james',
                           'F')
    dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F')
    dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F')

    # trees for training
    inputFile = r.TFile('{0}/output_combined.root'.format(inputFilesDir))
Exemple #19
0
#print("Input file: {}".format(INPUTFILE))
READ = False
with open(dataset + "/optimize_" + outf_key + "/varsListHPO.txt") as file:
    for line in file.readlines():
        if READ == True:
            varList.append(str(line).strip())
        if "Variable List:" in line: READ = True

numVars = len(varList)

outputfile = TFile(
    dataset + "/weights/TMVAOptimization_" + str(numVars) + "vars.root",
    "RECREATE")

loader = TMVA.DataLoader(dataset + "/optimize_" + outf_key)

for var in varList:
    loader.AddVariable(var, "", "", "F")

# add signal to loader
if year == 2017:
    for i in range(len(varsList.sig2017_1)):
        sig_list.append(TFile.Open(inputDir + varsList.sig2017_1[i]))
        sig_trees_list.append(sig_list[i].Get("ljmet"))
        sig_trees_list[i].GetEntry(0)
        loader.AddSignalTree(sig_trees_list[i], 1)
elif year == 2018:
    for i in range(len(varsList.sig2018_1)):
        sig_list.append(TFile.Open(inputDir + varsList.sig2018_1[i]))
        sig_trees_list.append(sig_list[i].Get("ljmet"))
Exemple #20
0
import ROOT
from ROOT import TMVA, TFile, TTree, TCut, TString

# In[2]:

outputFile = TFile("TMVA.root", 'RECREATE')
ROOT.TMVA.Tools.Instance()

factory = TMVA.Factory(
    'TMVAClassification', outputFile,
    '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
)

# In[3]:

loader = TMVA.DataLoader("dataset_cv")
loader.AddVariable("Pt", "P_{T}", "GeV", 'D')
loader.AddVariable("metE", "E^{miss}_{T}", "GeV", 'D')
loader.AddVariable("dPhi", "d#phi", 'D')
loader.AddVariable("Mt", "M_{T}", "GeV", 'D')

# In[4]:

f1 = ROOT.TFile.Open('/eos/user/g/gtolkach/signal_event_with_cuts_plus.root')
f2 = ROOT.TFile.Open(
    '/eos/user/g/gtolkach/beackground_event_with_cuts_plus.root')

signal = f1.Get('NOMINAL')
background = f2.Get('NOMINAL')

# In[5]:
Exemple #21
0
)

print ' '
print 'Copying data DecayTree to TreeB ...'
TreeB = tdata.CopyTree("")
print 'Data DecayTree copied to TreeB'
print ' '

print ' '
print 'Copying MC DecayTree to TreeS ...'
TreeS = tmc.CopyTree("")
print 'MC DecayTree copied to TreeS'
print ' '

dataloader = TMVA.DataLoader(
    "dataset1"
)  # xml weights in dataset1/weights/TMVAClassification_BDT.weights.xml

dataloader.AddVariable("Bs_PT", "D")
dataloader.AddVariable("Kst_PT", "D")
dataloader.AddVariable("Kstb_PT", "D")
dataloader.AddVariable("max_Kp_Km_PT   := max(Kp_PT,Km_PT)", "D")
dataloader.AddVariable("min_Kp_Km_PT   := min(Kp_PT,Km_PT)", "D")
dataloader.AddVariable("max_pip_pim_PT := max(pip_PT,pim_PT)", "D")
dataloader.AddVariable("min_pip_pim_PT := min(pip_PT,pim_PT)", "D")
dataloader.AddVariable("Bs_DIRA_OWNPV", "D")
dataloader.AddVariable("Bs_ENDVERTEX_CHI2", "D")
dataloader.AddVariable("Bs_LOKI_ETA", "D")
dataloader.AddVariable("Kst_LOKI_ETA", "D")
dataloader.AddVariable("Kstb_LOKI_ETA", "D")
dataloader.AddVariable("max_Kp_Km_ETA    := max(Kp_LOKI_ETA,Km_LOKI_ETA)", "D")
Exemple #22
0
def main():

    usage = 'usage: %prog [options]'
    parser = optparse.OptionParser(usage)
    parser.add_option(
        '-s',
        '--signal_sample',
        dest='input_file_name_signal',
        help='signal sample path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttHnobb_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-x',
        '--bckg1_sample',
        dest='input_file_name_ttJets',
        help='background sample 1 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-y',
        '--bckg2_sample',
        dest='input_file_name_ttW',
        help='background sample 2 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttWJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-z',
        '--bckg3_sample',
        dest='input_file_name_ttZ',
        help='background sample 3 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttZJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option('-a',
                      '--activation',
                      dest='activation_function',
                      help='activation function',
                      default='relu',
                      type='string')
    parser.add_option('-l',
                      '--hidden_layers',
                      dest='number_of_hidden_layers',
                      help='number of hidden layers',
                      default='2',
                      type='int')
    parser.add_option('-t',
                      '--var_transform',
                      dest='var_transform_name',
                      help='transformation used on input variables',
                      default='None',
                      type='string')
    parser.add_option('-j',
                      '--json',
                      dest='json',
                      help='json file with list of variables',
                      default=None,
                      type='string')
    parser.add_option('-r',
                      '--learning_rate',
                      dest='learning_rate',
                      help='learning rate',
                      default=0.008,
                      type='float')
    parser.add_option('-n',
                      '--num_epochs',
                      dest='num_epochs',
                      help='number of epochs',
                      default=10,
                      type='string')

    (opt, args) = parser.parse_args()

    number_of_hidden_layers = opt.number_of_hidden_layers
    activation_function = opt.activation_function
    var_transform_name = opt.var_transform_name
    num_epochs = opt.num_epochs
    jsonFile = open(opt.json, 'r')
    new_variable_list = json.load(jsonFile, encoding='utf-8').items()
    learning_rate = opt.learning_rate
    layer_nodes = 40

    # Setup TMVA interface to use Keras
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    if ',' in var_transform_name:
        var_transform_name_list = var_transform_name.split(',')
        new_var_transform_name = '+'.join(var_transform_name_list)
        print 'new_var_transform_name: ', new_var_transform_name
    else:
        print 'var_transform_name = ', var_transform_name
        new_var_transform_name = var_transform_name
        print 'new_var_transform_name: ', new_var_transform_name

    num_inputs = 0
    for key, value in new_variable_list:
        num_inputs = num_inputs + 1
    print 'num inputs = ', str(num_inputs)
    classifier_parent_dir = 'DNN_noCutJetN_%sVars_%sHLs_%s_%s-VarTrans_%s-learnRate_%s-epochs-%s-nodes' % (
        str(num_inputs), str(number_of_hidden_layers), activation_function,
        new_var_transform_name, str(learning_rate), num_epochs,
        str(layer_nodes))
    classifier_samples_dir = classifier_parent_dir + "/outputs"
    if not os.path.exists(classifier_samples_dir):
        os.makedirs(classifier_samples_dir)

    output_file_name = '%s/%s.root' % (classifier_samples_dir,
                                       classifier_parent_dir)
    output_file = TFile.Open(output_file_name, 'RECREATE')

    # 'AnalysisType' is where one defines what kind of analysis you're doing e.g. multiclass, Classification ....
    # VarTransform: Decorrelation, PCA-transformation, Gaussianisation, Normalisation (for all classes if none is specified).
    # When transformation is specified in factory object, the transformation is only used for informative purposes (not used for classifier inputs).
    # Distributions can be found in output to see how variables would look if transformed.
    factory_name = 'Factory_%s' % (classifier_parent_dir)
    factory_string = '!V:!Silent:Color:DrawProgressBar:Transformations=%s:AnalysisType=multiclass' % var_transform_name
    factory = TMVA.Factory(factory_name, output_file, factory_string)

    #Load data
    input_file_name_signal = opt.input_file_name_signal
    data_signal = TFile.Open(input_file_name_signal)
    signal = data_signal.Get('syncTree')

    input_file_name_ttJets = opt.input_file_name_ttJets
    data_bckg_ttJets = TFile.Open(input_file_name_ttJets)
    background_ttJets = data_bckg_ttJets.Get('syncTree')

    input_file_name_ttW = opt.input_file_name_ttW
    data_bckg_ttW = TFile.Open(input_file_name_ttW)
    background_ttW = data_bckg_ttW.Get('syncTree')

    input_file_name_ttZ = opt.input_file_name_ttZ
    data_bckg_ttZ = TFile.Open(input_file_name_ttZ)
    background_ttZ = data_bckg_ttZ.Get('syncTree')

    # Declare a dataloader interface
    dataloader_name = classifier_parent_dir
    dataloader = TMVA.DataLoader(dataloader_name)

    # Can add selection cuts via:
    # dataloader.AddTree(background_ttJets, 'Background_1', 'myvar > cutBarrelOnly && myEventTypeVar=1', backgroundWeight)

    ### Global event weights ###
    signalWeight = 1.
    backgroundWeight0 = 1.
    backgroundWeight1 = 1.
    backgroundWeight2 = 1.
    dataloader.AddTree(signal, 'ttH', signalWeight)
    dataloader.AddTree(background_ttW, 'ttW', backgroundWeight0)
    dataloader.AddTree(background_ttZ, 'ttZ', backgroundWeight1)
    dataloader.AddTree(background_ttJets, 'ttJets', backgroundWeight2)

    branches = {}
    for key, value in new_variable_list:
        dataloader.AddVariable(str(key))
        branches[key] = array('f', [-999])
        print 'variable: ', key
        branchName = ''
        branchName = key
    dataloader.AddSpectator('nEvent', 'F')

    # Nominal event weight:
    # event weight = puWgtNom * trigWgtNom * lepSelEffNom * genWgt * xsecWgt (* 0 or 1 depending on if it passes event selection)

    dataloader.SetWeightExpression("EventWeight", "ttH")
    dataloader.SetWeightExpression("EventWeight", "ttW")
    dataloader.SetWeightExpression("EventWeight", "ttZ")
    dataloader.SetWeightExpression("EventWeight", "ttJets")

    # NormMode: Overall renormalisation of event-by-event weights used in training.
    # "NumEvents" = average weight of 1 per event, independantly renormalised for signal and background.
    # "EqualNumEvents" = average weight of 1 per signal event, sum of weights in background equal to sum of weights for signal.
    #dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:NormMode=EqualNumEvents')
    dataloader.PrepareTrainingAndTestTree(
        TCut(''), 'V:SplitMode=Random:NormMode=EqualNumEvents')

    # Generate model:
    model = Sequential()

    # Add layers to DNN
    '''
    Dense:
    # Number of nodes
    init= # Initialisation
    activation= # Activation
    input_dim= # Shape of inputs (Number of inputs). Argument only needed for first layer.
    '''

    # first hidden layer
    model.add(
        Dense(layer_nodes,
              init='glorot_normal',
              activation=activation_function,
              input_dim=len(new_variable_list)))

    # Randomly set a fraction rate of input units (defined by argument) to 0 at each update during training (helps prevent overfitting).
    #model.add(Dropout(0.2))

    # Hidden layers
    for x in xrange(number_of_hidden_layers):
        model.add(Dense(layer_nodes, activation=activation_function))

    # Output layer
    # softmax ensures output values are in range 0-1. Can be used as predicted probabilities.
    # 'softmax' activation function used in final layer so that the outputs represent probabilities (output is normalised to 1).
    model.add(Dense(4, activation='softmax'))

    # Set loss and optimizer
    # categorical_crossentropy = optimisation algorithm with logarithmic loss function
    # binary_crossentropy
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=learning_rate),
                  metrics=[
                      'accuracy',
                  ])

    # Store model in file
    model.save('model.h5')
    model.summary()

    # Book methods
    # Choose classifier and define hyperparameters e.g number of epochs, model filename (as chosen above) etc.
    # VarTransform: Decorrelate, PCA, Gauss, Norm, None.
    # Transformations used in booking are used for actual training.
    logs_dir = classifier_parent_dir + '/logs'
    #factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100:Tensorboard=%s' % (var_transform_name, num_epochs, logs_dir)
    factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100' % (
        var_transform_name, num_epochs)
    factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "DNN",
                       factory_string_bookMethod)

    # Run training, testing and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()
Exemple #23
0
    output = TFile.Open('./outputMVA/' + signalID + out + '.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
    )

    # Load data
    path = '/home/myamatan/data3/angularAna/MVA_workspace/sampleMake/output/'
    dataS = TFile.Open(path + signalName + '.root')
    signal = dataS.Get('coll')
    dataB = TFile.Open(path + bkgName + '.root')
    background = dataB.Get('coll')

    #dataloader = TMVA.DataLoader('weights')
    dataloader = TMVA.DataLoader('test')
    nSF = 1
    nTrainSig = 40000 * nSF
    nTrainBkg = 40000 * nSF
    nInput = 10
    if gp in {'VBFH', 'VBFHVT'}:

        if regime == "Resolved":
            dataloader.AddVariable('Mvbfjj')
            dataloader.AddVariable('Ptvbfjj')
            dataloader.AddVariable('dEtavbfjj')
            dataloader.AddVariable('PtBalance')
            dataloader.AddVariable('MaxEta')
            dataloader.AddVariable('EtaStar')
            #dataloader.AddVariable('dEta_vbfjet1_Vqq')
            #dataloader.AddVariable('dEta_vbfjet2_Vqq')
Exemple #24
0
    def __init__(self,options):
        TMVA.Tools.Instance()
        TMVA.PyMethodBase.PyInitialize()
        gROOT.LoadMacro( "./TMVAGui.C" )
        self._lOutput     = TFile.Open('TMVA.root', 'RECREATE')
        self._lFactory    = TMVA.Factory('TMVAClassification', self._lOutput,'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
        self._lDataLoader = TMVA.DataLoader("dataset")

        for i0 in options.lVars:
            self._lDataLoader.AddVariable(i0,'F')

        self._lDataLoader.Print("all")
        #define signal and background tree based on LV flag
        if options.new:
           self._lInputFile  = TFile.Open(options.infile+'.root')
           self._lInputTree  = self._lInputFile.Get("Events")
           self._lSigFile       = TFile.Open("sig.root","RECREATE")
           self._lBkgFile       = TFile.Open("bkg.root","RECREATE")
           self._lSigFile.cd()
           self._lSigTree       = self._lInputTree.CopyTree("LV && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5")
           self._lSigTree.Show(53)
           self._lSigTree.Write()
           self._lSigFile.Close() 
           self._lBkgFile.cd()
           self._lBkgTree       = self._lInputTree.CopyTree("!LV")# && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5")
           self._lBkgTree.Write()
           self._lBkgFile.Close()
        
        self._lSigFile    = TFile.Open("sig.root","READ")
        self._lBkgFile    = TFile.Open("bkg.root","READ")
        self._lSigTree    = self._lSigFile.Get("Events")
        self._lBkgTree    = self._lBkgFile.Get("Events")

        self._lDataLoader.AddSignalTree    (self._lSigTree,1.0)
        self._lDataLoader.AddBackgroundTree(self._lBkgTree,1.0)

        nSig = self._lSigTree.GetEntries()
        nBkg = self._lBkgTree.GetEntries()

        nSigTrain = nSig*0.8
        nBkgTrain = nSig*0.8*1.2

        self._lDataLoader.PrepareTrainingAndTestTree(TCut(""),TCut(""),"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:NormMode=NumEvents:!V"%(nSigTrain,nBkgTrain,nSigTrain,nBkgTrain)) 

        Methods =  {
        #'Variable': TMVA.Types.kVariable,
        
        #'Cuts': TMVA.Types.kCuts,
        'Likelihood': TMVA.Types.kLikelihood,
        #'BDT': TMVA.Types.kBDT
        #'PyRandomForest': TMVA.Types.kPyRandomForest,
        #'MaxMethod': TMVA.Types.kMaxMethod
        }
        '''
        'PDERS': TMVA.Types.kPDERS,
        'HMatrix': TMVA.Types.kHMatrix,
        'Fisher': TMVA.Types.kFisher,
        'KNN': TMVA.Types.kKNN,
        'CFMlpANN': TMVA.Types.kCFMlpANN,
        'TMlpANN': TMVA.Types.kTMlpANN,
        'BDT': TMVA.Types.kBDT,
        'DT': TMVA.Types.kDT,
        'RuleFit': TMVA.Types.kRuleFit,
        'SVM': TMVA.Types.kSVM,
        'MLP': TMVA.Types.kMLP,
        'BayesClassifier': TMVA.Types.kBayesClassifier,
        'FDA': TMVA.Types.kFDA,
        'Boost': TMVA.Types.kBoost,
        'PDEFoam': TMVA.Types.kPDEFoam,
        'LD': TMVA.Types.kLD,
        'Plugins': TMVA.Types.kPlugins,
        'Category': TMVA.Types.kCategory,
        'DNN': TMVA.Types.kDNN,
        'PyRandomForest': TMVA.Types.kPyRandomForest,
        'PyAdaBoost': TMVA.Types.kPyAdaBoost,
        'PyGTB': TMVA.Types.kPyGTB,
        'PyKeras': TMVA.Types.kPyKeras,
        'C50': TMVA.Types.kC50,
        'RSNNS': TMVA.Types.kRSNNS,
        'RSVM': TMVA.Types.kRSVM,
        'RXGB': TMVA.Types.kRXGB,
        'MaxMethod': TMVA.Types.kMaxMethod
        '''

        for m,t in Methods.iteritems():
           self._lFactory.BookMethod( self._lDataLoader, t, m, "" )

        self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kBDT, 'BDT', '!H:!V:NTrees=300:MinNodeSize=2.5%:MaxDepth=3:BoostType=Grad:SeparationType=GiniIndex:nCuts=100:UseBaggedBoost=True:PruneMethod=NoPruning')
        #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT2', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=crossentropy:ncuts=100:prunemethod=nopruning')

        #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT3', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=GiniIndex:ncuts=100:prunemethod=nopruning')
        #self._lFactory.BookMethod( self._lDataLoader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

        model = Sequential()
        model.add(Dense(len(options.lVars), input_dim=len(options.lVars),activation='tanh'))
        model.add(Dense(30,  activation='tanh'))
        model.add(Dense(20,  activation='relu'))
        model.add(Dense(10,  activation='tanh'))
        model.add(Dense(5,   activation='relu'))
        model.add(Dense(2,   activation='sigmoid'))
 
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy',])
        model.save('model.h5')
        model.summary()

        self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:FilenameModel=model.h5:NumEpochs=10:BatchSize=500')

        frozen_graph = freeze_session(K.get_session(),
                              output_names=[out.op.name for out in model.outputs])
        tf.train.write_graph(frozen_graph, "h5_files", "tf_model.pb", as_text=False)
 

        self._lFactory.Print("v")
    '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

# Load data
dataBs = TFile.Open('bankBsJpsiPhi16.root')
dataBsD0 = TFile.Open('bankBsJpsiPhiDGamma016.root')
dataBu = TFile.Open('bankBuJpsiK16.root')
dataBd = TFile.Open('bankBdJpsiKx16.root')
dataBdNR = TFile.Open('bankBdKxMuMu16.root')

treeBs = dataBs.Get('PDsecondTree')
treeBsD0 = dataBsD0.Get('PDsecondTree')
treeBu = dataBu.Get('PDsecondTree')
treeBd = dataBd.Get('PDsecondTree')
treeBdNR = dataBdNR.Get('PDsecondTree')

dataloader = TMVA.DataLoader('dataset')

# add variables
dataloader.AddVariable('muoPt', 'F')
dataloader.AddVariable('abs(muoEta)', 'F')
dataloader.AddVariable('muoSegmComp', 'F')
dataloader.AddVariable('muoChi2LM', 'F')
dataloader.AddVariable('muoChi2LP', 'F')
dataloader.AddVariable('muoGlbTrackTailProb', 'F')
dataloader.AddVariable('muoIValFrac', 'F')
dataloader.AddVariable('muoLWH', 'I')
dataloader.AddVariable('muoTrkKink', 'F')
dataloader.AddVariable('muoGlbKinkFinderLOG', 'F')
dataloader.AddVariable('muoTimeAtIpInOutErr', 'F')
dataloader.AddVariable('muoOuterChi2', 'F')
dataloader.AddVariable('muoInnerChi2', 'F')
Exemple #26
0
    def _make_dataloader(self):
        '''Make the DataLoader for training.'''

        # Load the data.
        self.dataloader = TMVA.DataLoader(self.name)

        # Add training variables.
        for var in self.variables:
            if not isinstance(var, (tuple, list)):
                var = (var, )
            try:
                self.dataloader.AddVariable(*var)
            except:
                print 'Failed to call dataloader.AddVariable with args', var
                raise

        # Add spectator variables.
        for var in self.spectators:
            if not isinstance(var, (tuple, list)):
                var = (var, )
            try:
                self.dataloader.AddSpectator(*var)
            except:
                print 'Failed to call dataloader.AddSpectator with args', var
                raise

        # Register trees.
        # If we have explicit cuts for training and testing, we need to copy the TTrees first,
        # applying these cuts.
        if self.trainingcut:
            pwd = ROOT.gROOT.CurrentDirectory()
            self.tmpfile = ROOT.TFile.Open(
                os.path.abspath('DataLoader_' + random_string() + '.root'),
                'recreate')
            self.tmpfile.cd()
            signal_usedleaves, background_usedleaves = self.used_leaves()
            usedleaves = {
                'Signal': signal_usedleaves,
                'Background': background_usedleaves
            }
            aliases = {
                'Signal': get_aliases(self.signaltree),
                'Background': get_aliases(self.backgroundtree)
            }
            addtreeargs = []
            for name in 'Signal', 'Background':
                lname = name.lower()
                namecut = getattr(self, lname + 'cut')
                for tname, ttype, cut in ('Training', TMVA.Types.kTraining,
                                          self.trainingcut), (
                                              'Testing', TMVA.Types.kTesting,
                                              self.testingcut):
                    classname = self.name + '_' + name + '_' + tname + '_'
                    cut = AND(*filter(None, [namecut, cut]))
                    tree = getattr(self, lname + 'tree')
                    seltree, copyfriends = copy_tree(
                        tree,
                        selection=cut,
                        keepbranches=usedleaves[name],
                        rename=(
                            lambda name: classname + name.replace('/', '_')),
                        write=True,
                        returnfriends=True)
                    addtreeargs.append((seltree.GetName(), name,
                                        getattr(self, lname + 'globalweight'),
                                        ROOT.TCut(''), ttype))
                weight = getattr(self, lname + 'weight')
                if weight:
                    self.dataloader.SetWeightExpression(weight, name)

            fname = self.tmpfile.GetName()
            self.tmpfile.Close()
            self.tmpfile = ROOT.TFile.Open(fname)
            for args in addtreeargs:
                tree = self.tmpfile.Get(args[0])
                _aliases = aliases['Signal'] if 'Signal' in args else aliases[
                    'Background']
                for name, alias in _aliases.items():
                    tree.SetAlias(name, alias)
                self.dataloader.AddTree(tree, *args[1:])
            self.dataloader.GetDataSetInfo().SetSplitOptions(
                str(self.splitoptions))
            if pwd:
                pwd.cd()

        else:
            self.dataloader.AddSignalTree(self.signaltree,
                                          self.signalglobalweight)
            self.dataloader.AddBackgroundTree(self.backgroundtree,
                                              self.backgroundglobalweight)

            # Set weight expressions.
            if self.signalweight:
                self.dataloader.SetSignalWeightExpression(self.signalweight)
            if self.backgroundweight:
                self.dataloader.SetBackgroundWeightExpression(
                    self.backgroundweight)

            # Prepare the training.
            self.dataloader.PrepareTrainingAndTestTree(
                ROOT.TCut(self.signalcut), ROOT.TCut(self.backgroundcut),
                str(self.splitoptions))
        return True
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"
Exemple #28
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    dataloader.AddVariable( "myvar1 := var1+var2", 'F' )
    dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' )
    dataloader.AddVariable( "var3",                "Variable 3", "units", 'F' )
    dataloader.AddVariable( "var4",                "Variable 4", "units", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    #dataloader.AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    #dataloader.AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    # Read input data
    if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    input = TFile.Open( infname )

    # Get the signal and background trees for training
    signal      = input.Get( treeNameSig )
    background  = input.Get( treeNameBkg )
    
    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree    ( signal,     signalWeight     )
    dataloader.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut( "" ) 
    mycutBkg = TCut( "" ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    TMVA.TMVAGui(outfname)
    
    # keep the ROOT thread running
    gApplication.Run() 
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()

output = TFile.Open('TMVA_CNN_PyKeras.root', 'RECREATE')
factory = TMVA.Factory(
    'TMVAClassification', output,
    '!V:!Silent:Color:DrawProgressBar:Transformations=None:AnalysisType=Classification'
)

############################Loading the data file
data = TFile.Open("/home/jui/Desktop/tmva/sample_images_32x32.root")
# print(data.ls())
signal = data.Get('sig_tree;2')
background = data.Get('bkg_tree;2')

dataloader = TMVA.DataLoader('dataset_evaltest')

imgSize = 1024

dataloader.AddVariablesArray("vars", imgSize)

dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(
    TCut(''),
    'nTrain_Signal=8000:nTrain_Background=8000:SplitMode=Random:NormMode=NumEvents:!CalcCorrelations:!V'
)

# Generate model

# Define model
Exemple #30
0
argv = []

inputFile_sig = TFile.Open(
    "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_both_smearf_1M_35ns_e"
    + str(options.eff) + ".root")
inputFile_bkg = TFile.Open(
    "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_bkg_smearf_1M_35ns_e"
    + str(options.eff) + ".root")
outputFile = TFile.Open("TMVAOutput_sigbkg_e" + str(options.eff) + ".root",
                        "RECREATE")

factory = TMVA.Factory(
    "TMVAClassification", outputFile,
    "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")

loader = TMVA.DataLoader("dataset_e" + str(options.eff))

#loader.AddVariable("EventNumHazel",'I')
#loader.AddVariable("EventNumGingko",'I')
#loader.AddVariable("trigger_gingko",'I')
#loader.AddVariable("iroad_x",'I')
#loader.AddVariable("iroad_u",'I')
#loader.AddVariable("iroad_v",'I')
loader.AddVariable("Hit_plane0", 'I')
loader.AddVariable("Hit_plane1", 'I')
loader.AddVariable("Hit_plane2", 'I')
loader.AddVariable("Hit_plane3", 'I')
loader.AddVariable("Hit_plane4", 'I')
loader.AddVariable("Hit_plane5", 'I')
loader.AddVariable("Hit_plane6", 'I')
loader.AddVariable("Hit_plane7", 'I')