Beispiel #1
0
def calculate_variables(filename,
                        configpath,
                        friendTrees,
                        outpath,
                        apply_selection=False,
                        split_feature=None):
    print(" ===== EVALUATING FILE ===== ")
    print(filename)
    print(" =========================== ")

    config = load.Config(configpath, friendTrees, "Calculation")

    # open input file
    with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple:
        # open output root file
        with load.OutputFile(outpath) as outfile:
            outfile.SetConfigBranches(config)

            # start loop over ntuple entries
            first = True
            for i, event in enumerate(load.TreeIterator(ntuple)):
                if split_feature is None:
                    config.calculate_variables(event, outfile,
                                               outfile.sampleName)
                    outfile.FillTree()
                else:
                    loopSize = getattr(event, split_feature)
                    for idx in range(loopSize):
                        config.calculate_variables(event, outfile,
                                                   outfile.sampleName, idx)
                        outfile.FillTree()
                        outfile.ClearArrays()

                if first:
                    print("writing variables to output tree:")
                    for b in list(outfile.tree.GetListOfBranches()):
                        print(b.GetName())
                    first = False

                if i <= 10 and split_feature is None:
                    print(" === testevent ===")
                    for b in list(outfile.tree.GetListOfBranches()):
                        print(
                            b.GetName(), ", ".join([
                                str(entry) for entry in list(
                                    outfile.branchArrays[b.GetName()])
                            ]))
                    print(" =================" + "\n")
                outfile.ClearArrays()
                continue
Beispiel #2
0
def match_jets(filename,
               configpath,
               friendTrees,
               threshold,
               signal_only,
               outpath,
               apply_selection=False):
    print(" ===== EVALUATING FILE ===== ")
    print(filename)
    print(" =========================== ")

    config = load.Config(configpath, friendTrees, "Matching")

    # open input file
    with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple:

        # load hypotheses module
        hypotheses = Hypotheses(config)

        # initialize hypotheses combinatorics
        hypotheses.initPermutations()

        first = True
        fillIdx = 0
        # start loop over ntuple entries
        for i, event in enumerate(load.TreeIterator(ntuple)):
            entry, error = hypotheses.GetEntry(event, event.N_Jets)

            if first:
                # get list of all dataframe variables
                outputVariables = entry.columns.values
                outputVariables = np.append(outputVariables,
                                            config.naming + "_matchable")
                for v in outputVariables:
                    print(v)

                # setup empty array for event data storage
                outputSig = np.zeros(shape=(ntuple.GetEntries(),
                                            len(outputVariables)))
                if not signal_only:
                    outputBkg = np.zeros(shape=(ntuple.GetEntries(),
                                                len(outputVariables)))

                first = False

                # indices to fill basic variables regardless of matching status
                loIdxVars = hypotheses.nBaseVariables
                hiIdxVars = hypotheses.nAdditionalVariables

            if error:
                # for some reason no hypotheses are viable
                #   e.g. not enough jets
                if not apply_selection:
                    outputSig[fillIdx, :loIdxVars] = -99
                    outputSig[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[
                        0].values[loIdxVars:hiIdxVars]
                    outputSig[fillIdx, hiIdxVars:] = -99
                    if not signal_only:
                        outputBkg[fillIdx, :loIdxVars] = -99
                        outputBkg[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[
                            0].values[loIdxVars:hiIdxVars]
                        outputBkg[fillIdx, hiIdxVars:] = -99
                    fillIdx += 1
                continue

            # apply signal selection
            sig_selection = config.def_signal_selection
            entry_signal_selection = entry_selection(entry, sig_selection)
            # get best permutation
            # bestIndex = findBest(entry, threshold, config.match_variables)
            bestIndex = findBest(entry_signal_selection, threshold,
                                 config.match_variables)
            # fill -1 if no match was found
            if bestIndex == -1:
                if not apply_selection:
                    outputSig[fillIdx, :loIdxVars] = -1
                    outputSig[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[
                        0].values[loIdxVars:hiIdxVars]
                    outputSig[fillIdx, hiIdxVars:] = -1
                    if not signal_only:
                        outputBkg[fillIdx, :loIdxVars] = -1
                        outputBkg[fillIdx, loIdxVars:hiIdxVars] = entry.iloc[
                            0].values[loIdxVars:hiIdxVars]
                        outputBkg[fillIdx, hiIdxVars:] = -1
            else:
                # randIndex = config.get_random_index(entry, bestIndex)
                outputSig[fillIdx, :-1] = entry.iloc[bestIndex].values
                outputSig[fillIdx, -1] = 1
                if not signal_only:
                    bkg_selection = config.def_background_selection
                    entry_background_selection = entry_selection(
                        entry, bkg_selection)
                    # print entry_background_selection
                    # print entry_background_selection.shape[0]
                    fill = True
                    if entry_background_selection.shape[0] == 1:
                        if entry_background_selection.index[0] == bestIndex:
                            fill = False
                    if entry_background_selection.shape[0] == 0:
                        fill = False
                    if not fill:
                        if not apply_selection:
                            outputBkg[fillIdx, :loIdxVars] = -99
                            outputBkg[fillIdx,
                                      loIdxVars:hiIdxVars] = entry.iloc[
                                          0].values[loIdxVars:hiIdxVars]
                            outputBkg[fillIdx, hiIdxVars:] = -99
                    else:
                        randIndex = config.get_random_index(
                            entry_background_selection, bestIndex)
                        outputBkg[fillIdx, :-1] = entry.iloc[randIndex].values
                        outputBkg[fillIdx, -1] = 1

            if fillIdx <= 10:
                print("=== testevent ===")
                if not signal_only:
                    for name, sigval, bkgval in zip(outputVariables,
                                                    outputSig[fillIdx],
                                                    outputBkg[fillIdx]):
                        print(name, sigval, bkgval)
                else:
                    for name, sigval in zip(outputVariables,
                                            outputSig[fillIdx]):
                        print(name, sigval)
                print("=================" + "\n\n")

            fillIdx += 1

    # save information as h5 file
    #df = pd.DataFrame(outputData, columns = outputVariables)
    #df.to_hdf(outpath.replace(".root",".h5"), key = "data", mode = "w")
    #del df
    if apply_selection:
        print("events that fulfilled the selection {}/{}".format(
            fillIdx, len(outputSig)))
        outputSig = outputSig[:fillIdx]
        if not signal_only:
            outputBkg = outputBkg[:fillIdx]

    # open output root file
    if not signal_only:
        sigpath = outpath.replace(".root", "_sig.root")
        bkgpath = outpath.replace(".root", "_bkg.root")
    else:
        sigpath = outpath

    with load.OutputFile(sigpath) as outfile:
        # initialize branches
        outfile.SetBranches(outputVariables)
        # loop over events and fill tree
        for event in outputSig:
            outfile.FillTree(event)

    if not signal_only:
        with load.OutputFile(bkgpath) as outfile:
            # initialize branches
            outfile.SetBranches(outputVariables)
            # loop over events and fill tree
            for event in outputBkg:
                outfile.FillTree(event)
Beispiel #3
0
def evaluate_model(filename, modelconfigpath, configpath, friendTrees, outpath, apply_selection = False, write_input_vars = False):
    print(" ===== EVALUATING FILE ===== ")
    print(filename)
    print(" =========================== ")

    modelconfig = load.ModelConfig(modelconfigpath)
    
    model_variables = modelconfig.getAllVariables()

    config = load.Config(configpath, friendTrees, "Evaluation")
    additional_variables = []
    for v in config.additional_variables:
        if not v in model_variables:
            additional_variables.append(v)
    config.additional_variables = additional_variables

    # get information about variables that should be written into new friendtrees
    idxCommonVars = len(additional_variables)
    commonVars = list(additional_variables)

    additional_variables += model_variables
    modelconfig.setVariableIndices(additional_variables)    

    # open input file
    with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple:

        # load hypothesis module
        entry_loader = load.Entry(config)

        first = True
        fillIdx = 0
        # start loop over ntuple entries
        for i, event in enumerate(load.TreeIterator(ntuple)):
            entry, error = entry_loader.GetEntry(event)
            if first:
                # get list of all dataframe variables

                # variables that are to be written to output file
                outputVariables = np.array(commonVars)

                # if 'write_input_vars' is activated also write dnn inputs to new file
                if write_input_vars:
                    for outVar in model_variables:
                        outputVariables = np.append(outputVariables, outVar)
                idxBaseVars = len(outputVariables)

                # append output values of dnn
                outputVariables = modelconfig.setOutputVariables(outputVariables)

                # remove brakets
                outputVariables = [v.replace("[","_").replace("]","") for v in outputVariables]
                    
                # print variables
                print("variables to be written to output file:")
                for v in outputVariables:
                    print(v)
                print("=======================")
                
                # setup empty array for event data storage
                outputData = np.zeros(shape = (ntuple.GetEntries(), len(outputVariables)))

                # setup input array for dnn evaluation
                modelconfig.setInputData(ntuple.GetEntries()) 

                first = False

            if error:
                # if selection is not fulfilled
                # fill default values of -1 into entry
                if not apply_selection:
                    outputData[fillIdx,:] = -1
                    modelconfig.setEmptyEntry(fillIdx)
                    fillIdx += 1
                continue

            # fill output data array
            
            # common variables
            outputData[fillIdx, :idxCommonVars] = entry[0, :idxCommonVars]

            # dnn input variables
            if write_input_vars:
                outputData[fillIdx, idxCommonVars:idxBaseVars] = entry[0, idxCommonVars:idxBaseVars]

            # dnn input variables into input array
            modelconfig.fillInputData(fillIdx, entry, event)
            fillIdx+=1

    # cut outputData to filled length
    if apply_selection:
        print("events that fulfilled the selection: {}/{}".format(fillIdx, len(outputData)))
        outputData = outputData[:fillIdx]
        modelconfig.removeTrailingEntries(fillIdx)

    # get dnn output
    for dnnSet in modelconfig.dnnsets:
        dnnOutput, maxIndex = dnnSet.evaluate(len(outputData))
    
        # fill dnn output
        outputData[:, dnnSet.idxOutLo:dnnSet.idxOutHi] = dnnOutput
        # fill predicted index
        outputData[:, dnnSet.idxOutHi:dnnSet.idxPrediction] = maxIndex.reshape(len(outputData), -1)
    
    # test print of outputs
    for i in range(10):
        print("=== testevent ===")
        for name, value in zip(outputVariables, outputData[i]):
            print(name, value)
        print("================="+"\n\n")

    print("\nsaving information ...")
    # save information as h5 file
    df = pd.DataFrame(outputData, columns = outputVariables)
    df.to_hdf(outpath.replace(".root",".h5"), key = "data", mode = "w")
    del df            

    # open output root file
    with load.OutputFile(outpath) as outfile:
        # initialize branches
        outfile.SetBranches(outputVariables)
        # loop over events and fill tree
        for event in outputData:
            outfile.FillTree(event)
Beispiel #4
0
def evaluate_reconstruction(filename,
                            modelname,
                            configpath,
                            friendTrees,
                            outpath,
                            apply_selection=False):
    print(" ===== EVALUATING FILE ===== ")
    print(filename)
    print(" =========================== ")

    # load the DNN model
    model = load.Model(modelname)

    # set variables needed for dnn training
    model.setVariables()

    config = load.Config(configpath, friendTrees, "Reconstruction")

    # open input file
    with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple:

        # load hypotheses module
        hypotheses = Hypotheses(config)

        # initialize hypotheses combinatorics
        hypotheses.initPermutations()

        first = True
        fillIdx = 0
        # start loop over ntuple entries
        for i, event in enumerate(load.TreeIterator(ntuple)):
            entry, error = hypotheses.GetEntry(event, event.N_Jets)

            if first:
                # check if all variables for DNN evaluation are present in dataframe
                check_entry(entry, model.variables)

                # get list of all dataframe variables
                outputVariables = entry.columns.values
                # append output value to columns
                outputVariables = np.append(outputVariables,
                                            config.naming + "_DNNOutput")
                outputVariables = np.append(
                    outputVariables, config.naming + "_squaredDNNOutput")
                outputVariables = np.append(
                    outputVariables, config.naming + "_transformedDNNOutput")
                for v in outputVariables:
                    print(v)

                # setup empty array for event data storage
                outputData = np.zeros(shape=(ntuple.GetEntries(),
                                             len(outputVariables)))

                first = False

            if error:
                print("hypothesis not viable")
                # for some reason no hypotheses are viable
                #   e.g. not enough jets
                if not apply_selection:
                    outputData[fillIdx, :-3] = entry.iloc[0].values
                    # fill dummy output values of DNN
                    outputData[fillIdx, -3] = -1.
                    outputData[fillIdx, -2] = -99.
                    outputData[fillIdx, -1] = -99.
                    fillIdx += 1
                continue
            else:
                # get best permutation
                reco_selection = config.def_dnn_reco_selection
                entry_reco_selection = entry_selection(entry, reco_selection)
                if entry_reco_selection.shape[0] == 0:
                    outputData[fillIdx, :-3] = entry.iloc[0].values
                    # fill dummy output values of DNN
                    outputData[fillIdx, -3] = -9.
                    outputData[fillIdx, -2] = -9.
                    outputData[fillIdx, -1] = -9.
                    fillIdx += 1
                    continue
                # print entry_reco_selection
                bestIndex, outputValue = model.findBest(entry_reco_selection)
                # bestIndex, outputValue = model.findBest(entry)
                # fill output data array
                outputData[
                    fillIdx, :-3] = entry_reco_selection.iloc[bestIndex].values
                # outputData[fillIdx,:-3] = entry.iloc[bestIndex].values
                # fill output values of DNN
                outputData[fillIdx, -3] = outputValue
                outputData[fillIdx, -2] = outputValue**2
                outputData[fillIdx,
                           -1] = np.log(outputValue / (1. - outputValue))

                if fillIdx <= 10:
                    print("=== testevent ===")
                    for name, value in zip(outputVariables,
                                           outputData[fillIdx]):
                        print(name, value)
                    print("=================" + "\n\n")

                fillIdx += 1

    # cut outputData to filled length
    if apply_selection:
        print("events that fulfilled the selection: {}/{}".format(
            fillIdx, len(outputData)))
        outputData = outputData[:fillIdx]

    # save information as h5 file
    df = pd.DataFrame(outputData, columns=outputVariables)
    df.to_hdf(outpath.replace(".root", ".h5"), key="data", mode="w")
    del df

    # open output root file
    with load.OutputFile(outpath) as outfile:
        # initialize branches
        outfile.SetBranches(outputVariables)
        # loop over events and fill tree
        for event in outputData:
            outfile.FillTree(event)
Beispiel #5
0
def convert_database(filename, configpath, outpath, friendTrees, database):
    print(" ===== EVALUATING FILE ===== ")
    print(filename)
    print(" =========================== ")

    config = load.Config(configpath, friendTrees, "Database")

    # figure out the correct database to load
    dbfile, indexfile = config.getDataBase(filename, database)

    print("loading database....")
    # opening database root file
    rf = ROOT.TFile(dbfile)
    db = rf.Get(config.treename)

    # opening db file with indices
    idf = pd.read_hdf(indexfile)
    print(idf)

    # collect branches to write
    branches = list([b.GetName() for b in db.GetListOfBranches()])

    # open input file
    with load.InputFile(filename, config.getFriendTrees(filename)) as ntuple:
        # open output root file
        with load.OutputFile(outpath) as outfile:
            outfile.SetBranchList(branches+["Evt_Run", "Evt_Lumi", "Evt_ID"])

            # start loop over ntuple entries
            first = True
            for i, event in enumerate(load.TreeIterator(ntuple)):
                config.calculate_variables(event, outfile)

                # search for corresponding event in database
                foundDBEntry = False
                try:
                    # searching for event index
                    dbevt = idf.loc[(idf[config.run] == event.Evt_Run) & (idf[config.lumi] == event.Evt_Lumi) & (idf[config.evtid] == event.Evt_ID)]
                    idx = dbevt.index[0]
                    foundDBEntry = True
                except:
                    print("event ({}, {}, {}) is not in database - filling defaults".format(event.Evt_Run, event.Evt_Lumi, event.Evt_ID))

                if foundDBEntry:
                    # jumping to indexed event in tree
                    db.GetEvent(idx)
                    # filling branches
                    for b in branches:
                        outfile.branchArrays[b][0] = eval("db."+b)

                if first:
                    print("writing variables to output tree:")
                    for b in list(outfile.tree.GetListOfBranches()):
                        print(b.GetName())
                    first = False

                
                outfile.FillTree()
                if i<=10:
                    print(" === testevent ===")
                    for b in list(outfile.tree.GetListOfBranches()):
                        print(b.GetName(), ", ".join([str(entry) for entry in list(outfile.branchArrays[b.GetName()])]))
                    print(" ================="+"\n")
                outfile.ClearArrays()
                continue