Esempio n. 1
0
def simple_test_xgboost():
    data_x, data_y = data[0][:1000, :5], data[1][:1000]
    data_y_binary = (data_y > 5).astype(np.int32)

    print("Binary classification")
    print("training model")
    model = xgboost.XGBClassifier(n_estimators=10)
    model.fit(data_x, data_y_binary)
    for tree in model.booster().get_dump():
        print(tree)
    features = ["f{0}".format(i) for i in range(data_x.shape[1])]
    target_names = [
        "cls{0}".format(i) for i in range(len(np.unique(data_y_binary)))
    ]

    bdt = BDTxgboost(model, features, target_names)
    bdt.to_tmva("test.xml")
    bdt.setup_tmva("test.xml")

    d1 = 0.0
    for irow in range(data_x.shape[0]):
        predA1 = bdt.eval_tmva(data_x[irow, :])
        predB1 = bdt.eval(data_x[irow, :])[0]
        if np.abs(predA1 - predB1 > 0.1):
            print("large deviance for row", irow, predA1, predB1,
                  [data_x[irow, i] for i in range(5)])
        d1 += np.abs((predA1 - predB1) / predA1)
    return d1
Esempio n. 2
0
    def test_classify_binary(self):
        print("TestBDTxgboost test_classify_binary")
        model = xgboost.XGBClassifier(n_estimators=10)
        model.fit(self.data_x, self.data_y_binary)

        bdt = BDTxgboost(model, self.features, ["cls0", "cls1"])

        bdt.to_tmva("xgb_binary.xml")
        bdt.setup_tmva("xgb_binary.xml")

        dev = 0.0
        for irow in range(self.data_x.shape[0]):
            predA = bdt.eval_tmva(self.data_x[irow, :])
            predB = bdt.eval(self.data_x[irow, :])
            local_dev = np.abs((predA - predB) / predA)
            self.assertTrue(local_dev < 0.05)

            dev += local_dev
        self.assertTrue(dev < 0.01)
Esempio n. 3
0
def simple_test_xgboost():
    data_x, data_y = data[0][:1000, :5], data[1][:1000]
    data_y_binary = (data_y > 5).astype(np.int32)

    print "Binary classification"
    print "training model"
    model = xgboost.XGBClassifier(n_estimators=10)
    model.fit(data_x, data_y_binary)

    features = ["f{0}".format(i) for i in range(data_x.shape[1])]
    target_names = [
        "cls{0}".format(i) for i in range(len(np.unique(data_y_binary)))
    ]

    bdt = BDTxgboost(model, features, target_names)
    bdt.to_tmva("test.xml")
    bdt.setup_tmva("test.xml")

    d1 = 0.0
    for irow in range(data_x.shape[0]):
        predA1 = bdt.eval_tmva(data_x[irow, :])
        predB1 = bdt.eval(data_x[irow, :])
        d1 += np.abs((predA1 - predB1) / predA1)
geometry = opt.geometry
eta_region = opt.etaRegion
bdt_name = "%s_vs_%s_%s"%(opt.signal,opt.background,opt.bdtConfig)

#set up global variables
modelDir = os.environ['CMSSW_BASE']+"/src/L1Trigger/egid_analysis/HGCal_L1T_egammaID/output/models/%s"%geometry

#define variables used in model
egID_var_dict = {'electron_vs_neutrino_baseline':['cl3d_coreshowerlength','cl3d_firstlayer','cl3d_maxlayer','cl3d_srrmean'],'electron_vs_neutrino_full':['cl3d_coreshowerlength','cl3d_showerlength','cl3d_firstlayer','cl3d_maxlayer','cl3d_szz','cl3d_srrmean','cl3d_srrtot','cl3d_seetot','cl3d_spptot']}
egID_vars = egID_var_dict[ bdt_name ]

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Load models
egID_model = xg.Booster()
if eta_region in ['low','high']: modelStr = "%s/egID_%s_%s_%seta.model"%(modelDir,modelAlgo,bdt_name,eta_region)
else: modelStr = "%s/egID_%s_%s.model"%(modelDir,modelAlgo,bdt_name)
egID_model.load_model( modelStr )
print "  --> Loaded model: %s"%modelStr

# Define name of xml file to save
if eta_region in ['low','high']: f_xml = "%s/egID_%s_%s_%seta.xml"%(modelDir,modelAlgo,bdt_name,eta_region)
else: f_xml = "%s/egID_%s_%s.xml"%(modelDir,modelAlgo,bdt_name)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Convert to xml
from mlglue.tree import tree_to_tmva, BDTxgboost, BDTsklearn
target_names = ['background','signal']
bdt = BDTxgboost( egID_model, egID_vars, target_names, kind='binary', max_depth=6, learning_rate=0.3 )
bdt.to_tmva( f_xml )
print "  --> Converted to xml"
Esempio n. 5
0
#load models
altDiphoModel = xg.Booster()
#modelName = '/vols/cms/es811/Stage1categorisation/2016/models/altDiphoModel.model'
#modelName = '/vols/cms/es811/Stage1categorisation/2017/models/altDiphoModel.model'
#modelName = '/vols/cms/es811/Stage1categorisation/Pass1/2016/models/altDiphoModel.model'
modelName = '/vols/cms/es811/Stage1categorisation/Pass1/2017/models/altDiphoModel.model'
xmlName = modelName.split('/')[-1].replace('.model', '.xml')
altDiphoModel.load_model(modelName)
print 'Loaded model called %s' % modelName.split('/')[-1]
weightDir = 'WeightFiles/'
if '2016' in modelName:
    weightDir += '2016'
elif '2017' in modelName:
    weightDir += '2017'
else:
    exit('expected year 2016 or 2017 in path')
if not path.isdir(weightDir):
    system('mkdir -p %s' % weightDir)

#convert!
from mlglue.tree import tree_to_tmva, BDTxgboost, BDTsklearn
target_names = ['bkg', 'sig']
bdt = BDTxgboost(altDiphoModel,
                 diphoVars,
                 target_names,
                 kind='binary',
                 max_depth=6,
                 learning_rate=0.3)
bdt.to_tmva('%s/%s' % (weightDir, xmlName))
print 'Created xml called %s' % xmlName
Esempio n. 6
0
                   verbose=TrainVerbose)
    if len(clstst) > 0:
        clstst[ind].fit(eval_sets[ind][0][0],
                        eval_sets[ind][0][1],
                        eval_sets[ind][0][2],
                        eval_set=eval_sets[ind],
                        early_stopping_rounds=50,
                        eval_metric=["error"],
                        verbose=TrainVerbose)
    printlog("Training step " + str(ind + 1) + " of " + str(len(clses)) +
             " done.")

    if saveTraining and ind == saveSetindex:
        printlog("Saving training with features: " + str(saveSetnames))
        #features.append(["f{0}".format(i) for i in range(len(vset[ind]))])
        bdt = BDTxgboost(clses[ind], saveSetf, target_names)
        bdt.to_tmva(savefilename + ".xml")
        bdt.setup_tmva(savefilename + ".xml")
        printlog("Exported training step " + str(ind + 1) + " of " +
                 str(len(clses)) + " to \"" + savefilename + ".xml\"")


def insertcMVAdefault(dd):
    for ind in range(len(clses)):
        cond = (dd["Jet_JP"] == 0.) & (dd["Jet_JBP"] == 0.)
        for feat in [
                "Jet_CSV", "Jet_CSVIVF", "Jet_DeepCSVBDisc", "Jet_SoftMu",
                "Jet_SoftEl", "Jet_SoftMuasEta", "Jet_SoftMuSuppressed",
                "Jet_SoftMuSupPTEta", "Jet_SoftElasEta"
        ]:
            if feat in vset[ind]: