Python evaluate_reader Exemples, root_numpy.tmva.evaluate_reader Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : _tmvaReader.py Projet : arogozhnikov/rep

def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader, info.method_name, data, aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions

Exemple #2

0

Afficher le fichier

def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader,
                                      info.method_name,
                                      data,
                                      aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions

Exemple #3

0

Afficher le fichier

Fichier : EvalTMVA.py Projet : vananiashvili/4topAnalysis

 def ImportXml(self,train,test,ModelName):
     """ Import the given Xml model """
     reader = ROOT.TMVA.Reader()
     #Filling reader used for eval
     for Var in self.DataSet.LVariables:
         reader.AddVariable(Var,array('f', [0.]))
     xml = './dataset/weights/TMVAClassification_'+ModelName+'.weights.xml'                          
     reader.BookMVA(ModelName,xml)
     return evaluate_reader(reader,ModelName,test.Events), evaluate_reader(reader,ModelName,train.Events)                   # retuns the Classifcation Score

Exemple #4

0

Afficher le fichier

def plot_clf_results_tmva(reader,
                          x_train,
                          y_train,
                          w_train,
                          x_test,
                          y_test,
                          w_test,
                          nbins=30,
                          figname="BDTOutput_tmva.png",
                          verbose="False"):

    decisions = []
    weights = []
    for x, y, w in ((x_train, y_train, w_train), (x_test, y_test, w_test)):
        w *= 1. / np.sum(w)
        dsig = evaluate_reader(reader, "BDT", x[y > 0.5])
        wsig = w[y > 0.5]
        dbkg = evaluate_reader(reader, "BDT", x[y < 0.5])
        wbkg = w[y < 0.5]
        decisions += [dsig, dbkg]
        weights += [wsig, wbkg]

    plot_clf_results(tuple(decisions), tuple(weights), nbins, figname, verbose)

Exemple #5

0

Afficher le fichier

def get_tmva_test_results(directory, variables, name=''):
    # TMVA reader
    reader = r.TMVA.Reader()
    for var in variables:
        #vtype = 'i' if var in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'
        reader.AddVariable(var, array('f', [0]))

    reader.BookMVA('BDT', directory + 'weights/TMVA_BDT.weights.xml')

    # Get testing dataset
    filename = directory + 'tmva_output.root'
    x_test = rec2array(root2array(filename, 'TestTree', variables))
    y_test = 1 - root2array(filename, 'TestTree', 'classID')
    w_test = root2array(filename, 'TestTree', 'weight')

    y_decision = evaluate_reader(reader, "BDT", x_test)
    return (y_test, y_decision, w_test, name)

Exemple #6

0

Afficher le fichier

Fichier : training.py Projet : cms-ttH/ttH-TauRoast

def evaluate(config, tree, names, transform=None):
    output = []
    dtype = []
    for name in names:
        setup = load(config, name.split("_")[1])
        data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"]))
        if name.startswith("sklearn"):
            fn = os.path.join(config["mvadir"], name + ".pkl")
            with open(fn, 'rb') as fd:
                bdt, label = pickle.load(fd)
            scores = []
            if len(data) > 0:
                scores = bdt.predict_proba(data)[:, 1]
            output += [scores]
            dtype += [(name, 'float64')]

        fn = os.path.join(config["mvadir"], name + ".xml")
        reader = r.TMVA.Reader("Silent")
        for var in setup['variables']:
            reader.AddVariable(var, array('f', [0.]))
        reader.BookMVA("BDT", fn)
        scores = evaluate_reader(reader, "BDT", data)
        output += [scores]
        dtype += [(name.replace("sklearn", "tmvalike"), 'float64')]

    f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ")
    if f.IsOpen():
        likelihood = f.Get("hTargetBinning")

        def lh(values):
            return likelihood.GetBinContent(likelihood.FindBin(*values))
        indices = dict((v, n) for n, (v, _) in enumerate(dtype))
        tt = output[indices['tmvalike_tt']]
        ttZ = output[indices['tmvalike_ttZ']]
        if len(tt) == 0:
            output += [[]]
        else:
            output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)]
        dtype += [('tmvalike_likelihood', 'float64')]
        f.Close()

    data = np.array(zip(*output), dtype)
    tree.mva(array2tree(data))

Exemple #7

0

Afficher le fichier

Fichier : root_tmva.py Projet : jemrobinson/bbyy_jet_classifier

    def test(self, test_data, classification_variables, training_sample):
        """
        Definition:
        -----------
            Testing method for RootTMVA; it loads the latest model from the "weights" sub-folder
        Args:
        -----
            data = dictionary, containing "X", "y", "w" for the set to evaluate performance on, where:
                X = ndarray of dim (# examples, # features)
                y = array of dim (# examples) with target values
                w = array of dim (# examples) with event weights
            classification_variables = list of names of variables used for classification
            training_sample = string that specifies the file name of the sample to use as a training (e.g. "SM_merged" or "X350_hh")

        Returns:
        --------
            yhat = the array of BDT outputs, of dimensions (n_events)
        """
        logging.getLogger("root_tmva").info("Evaluating performance...")

        # -- Construct reader and add variables to it:
        logging.getLogger("root_tmva").info("Construct TMVA reader and add variables to it")
        reader = TMVA.Reader()
        for v_name in classification_variables:
            reader.AddVariable(v_name, array.array("f", [0]))

        # -- Load TMVA results
        reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_BDT.weights.xml"))
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_Fisher.weights.xml"))
        # -- Load skl_BDT results (used for testing only)
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, "skl_BDT", "classifier", "skl_BDT_TMVA.weights.xml"))

        yhat = evaluate_reader(reader, "BDT", test_data["X"])
        # -- add binary classification labels
        yhat_class = np.zeros(len(yhat))
        yhat_class[yhat >= 0] = 1
        return yhat, yhat_class

Exemple #8

0

Afficher le fichier

Fichier : plot_multiclass.py Projet : vvolkl/root_numpy

# Train an MLP
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'MLP', 'MLP',
           'NeuronType=tanh:NCycles=200:HiddenLayers=N+2,2:'
           'TestRate=5:EstimatorType=MSE')
factory.TrainAllMethods()

# Classify the test dataset with the BDT
reader = TMVA.Reader()
for n in range(2):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('MLP', 'weights/classifier_MLP.weights.xml')
class_proba = evaluate_reader(reader, 'MLP', X_test)

# Plot the decision boundaries
plot_colors = "rgb"
plot_step = 0.02
class_names = "ABC"
cmap = plt.get_cmap('Paired')

fig = plt.figure(figsize=(5, 5))
fig.patch.set_alpha(0)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Z = evaluate_reader(reader, 'MLP', np.c_[xx.ravel(), yy.ravel()])

Exemple #9

0

Afficher le fichier

Fichier : testRootNumpy.py Projet : BetterWang/cmssw

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'BDT', 'BDT1',
                   'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
BookMethod(data, 'BDT', 'BDT2',
                   'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')

Exemple #10

0

Afficher le fichier

Fichier : plot_twoclass.py Projet : tibristo/BosonTagger

factory.BookMethod('BDT', 'BDT',
                   'NTrees=100:nEventsMin=30:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=10:PruneStrength=2:PruneMethod=ExpectedError')
#factory.BookMethod('kBDT', 'BDT',
#                   'Fisher:VarTransform=None:CreateMVAPdfs:'
#                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
#                   'NsmoothMVAPdf=10')

print 'added all the events and booked the method'
factory.TrainAllMethods()
print 'trained'
# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('BDT', 'weights/classifier_BDT.weights.xml')
twoclass_output = evaluate_reader(reader, 'BDT', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
'''
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Exemple #11

0

Afficher le fichier

# The following line is necessary if events have been added individually:
factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')

# Train a classifier
factory.BookMethod('Fisher', 'Fisher',
                   'Fisher:VarTransform=None:CreateMVAPdfs:'
                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
                   'NsmoothMVAPdf=10')
factory.TrainAllMethods()

# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('Fisher', 'weights/classifier_Fisher.weights.xml')
twoclass_output = evaluate_reader(reader, 'Fisher', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Exemple #12

0

Afficher le fichier

Fichier : evaluate_and_store.py Projet : radicaleid/btagging

def main(weights, picklename, filename, treename='bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights)
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [
        var.attrib['Label']
        for var in root.findall('Variables')[0].findall('Variable')
    ]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)

    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(
        filename,
        treename,
        branches=[
            'jet_pt', 'jet_eta', 'jet_phi', 'jet_m', 'jet_ip2d_pu',
            'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc',
            'jet_ip3d_pb', 'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z',
            'jet_sv1_ntrkv', 'jet_sv1_m', 'jet_sv1_efc', 'jet_sv1_n2t',
            'jet_sv1_sig3d', 'jet_jf_n2t', 'jet_jf_ntrkAtVx', 'jet_jf_nvtx',
            'jet_jf_nvtx1t', 'jet_jf_m', 'jet_jf_efc', 'jet_jf_sig3d',
            'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz'
        ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt': 'jet_pt',
        'abs(eta)': 'abs(jet_eta)',
        'ip2': 'jet_ip2',
        'ip2_c': 'jet_ip2_c',
        'ip2_cu': 'jet_ip2_cu',
        'ip3': 'jet_ip3',
        'ip3_c': 'jet_ip3_c',
        'ip3_cu': 'jet_ip3_cu',
        'sv1_ntkv': 'jet_sv1_ntrkv',
        'sv1_mass': 'jet_sv1_m',
        'sv1_efrc': 'jet_sv1_efc',
        'sv1_n2t': 'jet_sv1_n2t',
        'sv1_Lxy': 'jet_sv1_Lxy',
        'sv1_L3d': 'jet_sv1_L3d',
        'sv1_sig3': 'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv': 'jet_jf_n2t',
        'jf_ntrkv': 'jet_jf_ntrkAtVx',
        'jf_nvtx': 'jet_jf_nvtx',
        'jf_nvtx1t': 'jet_jf_nvtx1t',
        'jf_mass': 'jet_jf_m',
        'jf_efrc': 'jet_jf_efc',
        'jf_dR': 'jet_jf_dR',
        'jf_sig3': 'jet_jf_sig3d'
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0]))
    reader.BookMVA('BDTG akt2', weights)

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(
            np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb'))

    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0

Exemple #13

0

Afficher le fichier

Fichier : collections.py Projet : Pmeiring/NtupleTools

def cl3d_fixtures(clusters, tcs):
    # print clusters.columns
    # for backward compatibility
    if clusters.empty:
        return clusters

    clusters.rename(columns={
        'clusters_id': 'clusters',
        'clusters_n': 'nclu'
    },
                    inplace=True)
    # clusters['hwQual'] = clusters['quality']
    do_compute_hoe = False
    do_compute_layer_energy = False
    if 'hoe' not in clusters.columns:
        do_compute_hoe = True
    if 'layer_energy' not in clusters.columns:
        do_compute_layer_energy = True

    def compute_layer_energy3(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        results = []
        if do_layer_energy:
            results.append(hist)
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            results.append(hoe)
        return results

    def compute_layer_energy2(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        if do_layer_energy:
            cluster['layer_energy'] = hist
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            cluster['hoe'] = hoe
        return cluster

    if do_compute_hoe or do_compute_layer_energy:
        # clusters = clusters.apply(lambda cl: compute_layer_energy2(cl,
        #                                                            do_compute_layer_energy,
        #                                                            do_compute_hoe), axis=1)
        new_columns = []
        if do_compute_layer_energy:
            new_columns.append('layer_energy')
        if do_compute_hoe:
            new_columns.append('hoe')
        clusters[new_columns] = clusters.apply(
            lambda cl: compute_layer_energy3(cl, do_compute_layer_energy,
                                             do_compute_hoe),
            result_type='expand',
            axis=1)

    clusters['ptem'] = clusters.pt / (1 + clusters.hoe)
    clusters['eem'] = clusters.energy / (1 + clusters.hoe)
    if False:
        clusters['bdt_pu'] = rnptmva.evaluate_reader(
            classifiers.mva_pu_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])

        clusters['bdt_pi'] = rnptmva.evaluate_reader(
            classifiers.mva_pi_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])
    return clusters

Exemple #14

0

Afficher le fichier

    print 'Training takes ', stop - start, 's'
if not args.quiet:
    print 'Training done'
    print 'Outputs in directory : ', args.outdir

# evaluate training results
if args.evaluate:

    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    reader = TMVA.Reader()
    for v in var:
        #vtype = 'i' if v in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'

        reader.AddVariable(v, array('f', [0]))

    reader.BookMVA('BDT', args.outdir + 'weights/TMVA_BDT.weights.xml')
    y_decision = evaluate_reader(reader, "BDT", x_test)
    util.plot_clf_results_tmva(reader,
                               x_train,
                               y_train,
                               w_train,
                               x_test,
                               y_test,
                               w_test,
                               figname=args.outdir + "bdtoutput.png",
                               verbose=(not args.quiet))
    util.plot_roc((y_test, y_decision, w_test),
                  figname=args.outdir + 'roc.png')

Exemple #15

0

Afficher le fichier

Fichier : testRootNumpy.py Projet : archiesharma/muonPhase2

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
    data, 'BDT', 'BDT1', 'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
BookMethod(
    data, 'BDT', 'BDT2', 'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')

Exemple #16

0

Afficher le fichier

Fichier : test_tmva.py Projet : sznajder/hepaccelerate-cms

reader2j.AddVariable("hmmphics", arr())
reader2j.AddVariable("j1pt", arr())
reader2j.AddVariable("j1eta", arr())
reader2j.AddVariable("j2pt", arr())
reader2j.AddVariable("detajj", arr())
reader2j.AddVariable("dphijj", arr())
reader2j.AddVariable("mjj", arr())
reader2j.AddVariable("met", arr())
reader2j.AddVariable("zepen", arr())
reader2j.AddVariable("njets", arr())
reader2j.AddVariable("drmj", arr())
reader2j.AddVariable("m1ptOverMass", arr())
reader2j.AddVariable("m2ptOverMass", arr())
reader2j.AddVariable("m1eta", arr())
reader2j.AddVariable("m2eta", arr())
reader2j.AddSpectator("hmerr", arr())
reader2j.AddSpectator("weight", arr())
reader2j.AddSpectator("hmass", arr())
reader2j.AddSpectator("nbjets", arr())
reader2j.AddSpectator("bdtucsd_inclusive", arr())
reader2j.AddSpectator("bdtucsd_01jet", arr())
reader2j.AddSpectator("bdtucsd_2jet", arr())

for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), )
reader.BookMVA('TMVAClassification_BDTG.weights.2jet_bveto',
               'TMVAClassification_BDTG.weights.2jet_bveto.xml')

X = np.zeros((1, 24), dtype='f')
Z = evaluate_reader(reader, 'TMVAClassification_BDTG.weights.2jet_bveto', X)

Exemple #17

0

Afficher le fichier

Fichier : evaluate_and_store.py Projet : mickypaganini/TrackJetBTagging

def main(weights, picklename, filename, treename = 'bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights) 
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [var.attrib['Label'] for var in root.findall('Variables')[0].findall('Variable')]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)
    
    
    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(filename, treename, branches = ['jet_pt', 'jet_eta','jet_phi', 'jet_m', 'jet_ip2d_pu', 
        'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc','jet_ip3d_pb',
        'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z', 'jet_sv1_ntrkv',
        'jet_sv1_m','jet_sv1_efc','jet_sv1_n2t','jet_sv1_sig3d',
        'jet_jf_n2t','jet_jf_ntrkAtVx','jet_jf_nvtx','jet_jf_nvtx1t','jet_jf_m',
        'jet_jf_efc','jet_jf_sig3d', 'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz' ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt':'jet_pt',
        'abs(eta)':'abs(jet_eta)',
        'ip2':'jet_ip2',
        'ip2_c':'jet_ip2_c',
        'ip2_cu':'jet_ip2_cu',
        'ip3':'jet_ip3',
        'ip3_c':'jet_ip3_c',
        'ip3_cu':'jet_ip3_cu',
        'sv1_ntkv':'jet_sv1_ntrkv',
        'sv1_mass':'jet_sv1_m',
        'sv1_efrc':'jet_sv1_efc',
        'sv1_n2t':'jet_sv1_n2t',
        'sv1_Lxy':'jet_sv1_Lxy',
        'sv1_L3d':'jet_sv1_L3d',
        'sv1_sig3':'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv':'jet_jf_n2t',
        'jf_ntrkv':'jet_jf_ntrkAtVx',
        'jf_nvtx':'jet_jf_nvtx',
        'jf_nvtx1t':'jet_jf_nvtx1t',
        'jf_mass':'jet_jf_m',
        'jf_efrc':'jet_jf_efc',
        'jf_dR':'jet_jf_dR',
        'jf_sig3':'jet_jf_sig3d' 
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0] ) )
    reader.BookMVA('BDTG akt2', weights) 

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb')) 
    
    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0