Ejemplo n.º 1
0
import ROOT as r
import math
import os.path
from LatexTable import Table
from config import path, bin_fmt, bins, systInfo, Muon, Electron
import utils
import math

if __name__ == "__main__":
    r.gROOT.LoadMacro("predict.C+")
    channel = Electron
    ctrl_channel = None
    # Load MC pseudo-data in bins
    data = utils.getZeroMC(channel, ctrl_channel)
    # This also calculates statistical uncertainties
    results = utils.makePredictions(data)

    # Get the scaled/smeared systematic variations
    systs = utils.getLiterallyAllSystematicsBkg(channel, ctrl_channel)

    # Loop through systematics and calculate systematic uncertainty
    for name, scaled in systs:
        utils.addSystematic(name, data, results, scaled)
        print name

    # LaTeX table
    l = Table()
    cols = [("value", "Bin", "l")] + [("bin%d" % i, utils.formatBin(i), "c") for i in range(len(bins))]

    l.defineColumns(cols)
Ejemplo n.º 2
0
print('ok.\n')

print('Generating Age Scaler...', end="")
from sklearn.preprocessing import StandardScaler
age_scaler = StandardScaler()
age_scaler.fit(X_test['Age'].values.reshape(-1,1))
print('ok.\n')

#Train model with most important features. Check notebook for more infos.
important_features = ['Geography_France', 'Geography_Germany', 'Geography_Spain', 'Gender', 'Age', 'IsActiveMember', 'NumOfProducts']

model.fit(X_train_over_scaled[important_features], y_train_over)

from utils import trainModels, makePredictions

recall, accu, roc_auc = makePredictions(model, X_test_scaled[important_features], y_test, verbose=True)

final_results = {'Reduced Voting Classifier': {'recall': recall, 'Accuracy': accu, 'ROC-AUC':roc_auc}}

print('Saving models...', end="")
reduced_model_data = pd.Series({
    'ohe': ensembled_model['ohe'],
    'oe': ensembled_model['oe'],
    'scaler_train': ensembled_model['scaler_train'],
    'scaler_test': age_scaler,
    'models': model,
    'models_results': final_results,
    'features': important_features
})

reduced_model_data.to_pickle(os.path.join(MODEL_DIR, 'reduced_model.pkl'))
Ejemplo n.º 3
0
def setupLimit(channel):
    def makeLegend():
        return r.TLegend(0.7, 0.9, 0.9, 0.6)
    def plot(NObserved, results, NControl, R, bkgPredict):
        r.gROOT.SetStyle("Plain")
        c = r.TCanvas("test")
        c.SetGrid()
        nbins = len(NObserved)
        obs = r.TH1D("obs", "obs", nbins, 0.5, nbins+0.5)
        pred = r.TH1D("pred", "pred", nbins, 0.5, nbins+0.5)
        pred2 = r.TH1D("pred2", "pred2", nbins, 0.5, nbins+0.5)
        for idx, b in enumerate(NObserved):
            obs.SetBinContent(idx+1, b)
            if bkgPredict: pred2.SetBinContent(idx+1, R["nominal"][idx]*bkgPredict[0][idx])
            pred.SetBinContent(idx+1, results[idx].predicted())
            perr = math.sqrt(results[idx].predicted())
            serr = R["MCStats"][idx]*NControl[idx]
            pred.SetBinError(idx+1,
                             math.sqrt(perr**2 + serr**2))
            obs.GetXaxis().SetBinLabel(idx+1, utils.formatBin(idx, False))
        pred.SetLineColor(r.kRed)
        pred2.SetLineColor(r.kGreen);
        obs.GetYaxis().SetRangeUser(0, 1.2*max(obs.GetMaximum(), pred.GetMaximum()))
        obs.SetStats(r.kFALSE)
        obs.Draw("hist")
        if bkgPredict: pred2.Draw("hist e same")
        pred.Draw("hist e same")
        leg = makeLegend()
        leg.AddEntry(pred, "Predicted", "L")
        if bkgPredict: leg.AddEntry(pred2, "Predicted (QCD Fit)", "L")
        leg.AddEntry(obs, "Observed", "L")
        leg.Draw()
        c.SaveAs("limit/%s_pred_obs.pdf" % channel.name)
    def plotA(path, *args, **kwargs):
        c = r.TCanvas("test")
        c.SetGrid()
        r.gROOT.SetStyle("Plain")
        nbins = len(args[0])
        hists = [r.TH1D("h%d" % idx, "h%d" %idx, nbins, 0.5, nbins+0.5) for idx in range(len(args))]
        for idx in range(nbins):
            for hidx, h in enumerate(hists):
                h.SetBinContent(idx+1, args[hidx][idx])
                h.SetBinError(idx+1, math.sqrt(args[hidx][idx]))
        cols = kwargs.get("cols", [r.kBlack, r.kRed])
        legs = kwargs.get("legend", [])
        legend = makeLegend() if len(legs) > 0 else None
        for hidx, h in enumerate(hists):
            h.SetLineColor(cols[hidx])
            h.GetYaxis().SetRangeUser(0, 1.2*max([h.GetMaximum() for h in hists]))
            if hidx == 0: h.Draw("hist e")
            else: h.Draw("hist e same")
            if hidx < len(legs): legend.AddEntry(h, legs[hidx], "L")
        if legend: legend.Draw()
        c.SaveAs(path)

    ctrl_channel = bkgPredict = None
    # Get the background prediction per bin
    if channel.bkgPrediction == "OtherChannel":
        ctrl_channel = channel.ctrlChannel

    data = utils.getZeroData(channel, ctrl_channel)
    mc = utils.getZeroMC(channel, ctrl_channel)

    results = utils.makePredictions(data)

    systs = utils.getSystematicsBkg(channel, ctrl_channel)

    R = {"nominal": [b.R() for b in data]}

    for name, scaled in systs:
        R[name] = utils.getSystematicShiftsR(mc, scaled[0], scaled[1])
    R["MCStats"] = utils.mcStatsSystematicBkg(channel.bkgSamples, channel, ctrl_channel)
    R["ttpol"] = [x*tt for x, tt in zip(R["nominal"], ch.ttPolarisationUncertainty)]
    NObserved = [b.observed() for b in data]
    NControl = [b.control() for b in data]
    NControlMC = [b.mcControl() for b in mc]
    # This is to fix a problem with this not being scaled when running in real data mode.
    # Its not pretty but it works (I hope!)
    if cfg.useRealData:
        scaleMC = ch.lumi/cfg.icfDefaultLumi
        NControlMC = [x*scaleMC for x in NControlMC]
    print "Extracting signal data..."
    susyEff = utils.getZeroMCSignal(channel)
    controlRegionEff = utils.getZeroMCSignalControlRegion(channel)
    effSysts = utils.getSystematicsSignalEff(channel)

    if channel.bkgPrediction == "QCDFit":
        if cfg.useRealData: bkgPredict = (channel.ewkN, channel.ewkErr, {})
        else: bkgPredict = (NControl, [rel*control for rel, control in zip(channel.ewkRelErr, NControl)], {})

    for (m0, m12), p in susyEff.iteritems():
        p["effShift"] = {}
        p["control_efficiencies"] = controlRegionEff[(m0, m12)]["efficiencies"]
        for name,scaled in effSysts:
            shift = utils.getSystematicShiftsEff(p, scaled[0], scaled[1])
            p["effShift"][name] = shift
        if "pdfunc" in channel.includeSignalSysts:
            p["effShift"]["pdfunc"] = [0.1*eff for eff in p["efficiencies"]]

    plot(NObserved, results, NControl, R, bkgPredict)
    plotA("limit/R_%s.pdf" % channel.name, R["nominal"], legend = ["R"])

    # This is an ad-hoc correction to the electron limit to account for poor
    # statistics

    if ch == cfg.Electron and False:
        print "Correcting Electron channel top bin!"
        R["nominal"] = [x*2 for x in R["nominal"]]
#        for k, v in R.iteritems():
 #           R[k] = [x*2.25 for x in R[k]]
            #R[k][-1] = R[k][-1]*2.25
        if not cfg.useRealData:
            NObserved[-1] = NObserved[-1]*2.25

    if cfg.expectedLimit:
        if bkgPredict is None: NObserved = [Ri*cont for Ri, cont in zip(R["nominal"], NControl)]
        else: NObserved = [Ri*cont for Ri, cont in zip(R["nominal"], bkgPredict[0])]
    return {
        "name" : ch.name,
        "NObserved" : NObserved,
        "NControl" : NControl,
        "NControlMC" : NControlMC,
        "bkgPredict" : bkgPredict,
        "R" : R,
        "lumi": ch.lumi,
        "triggerEfficiency":ch.triggerEfficiency,
        "lumiError": cfg.lumiError,
        "signal" : susyEff,
        }
tunned_models = pd.read_pickle(os.path.join(MODEL_DIR, 'tunned_models.pkl'))
print('ok.\n')

chosen_models = [('Random Forest', tunned_models['models'][0][1]),
                 ('Logistic Regressor', oversampled_models['models'][1][1]),
                 ('XGB Classifier', tunned_models['models'][1][1])]

vclf = VotingClassifier(estimators=chosen_models,
                        voting='soft',
                        weights=[6, 5, 4])
vclf = vclf.fit(X_train_over_scaled, y_train_over)

from utils import trainModels, makePredictions

recall, accu, roc_auc = makePredictions(vclf,
                                        X_test_scaled,
                                        y_test,
                                        verbose=True)
"""stack_clf = StackingClassifier(chosen_models,
                               final_estimator=tunned_models['models'][0][1],
                               cv=2) 
stack_clf.fit(X_train_over_scaled, y_train_over)

makePredictions(stack_clf, X_test_scaled, y_test, verbose=True)"""

final_results = {
    'Voting Classifier': {
        'recall': recall,
        'Accuracy': accu,
        'ROC-AUC': roc_auc
    }
}
Ejemplo n.º 5
0
def setupLimit(channel):
    def makeLegend():
        return r.TLegend(0.7, 0.9, 0.9, 0.6)
    def plot(NObserved, results, NControl, R, bkgPredict):
        r.gROOT.SetStyle("Plain")
        c = r.TCanvas("test")
        c.SetGrid()
        nbins = len(NObserved)
        obs = r.TH1D("obs", "obs", nbins, 0.5, nbins+0.5)
        pred = r.TH1D("pred", "pred", nbins, 0.5, nbins+0.5)
        pred2 = r.TH1D("pred2", "pred2", nbins, 0.5, nbins+0.5)
        for idx, b in enumerate(NObserved):
            obs.SetBinContent(idx+1, b)
            if bkgPredict: pred2.SetBinContent(idx+1, R["nominal"][idx]*bkgPredict[0][idx])
            pred.SetBinContent(idx+1, results[idx].predicted())
            perr = math.sqrt(results[idx].predicted())
            serr = R["MCStats"][idx]*NControl[idx]
            pred.SetBinError(idx+1,
                             math.sqrt(perr**2 + serr**2))
            obs.GetXaxis().SetBinLabel(idx+1, utils.formatBin(idx, False))
        pred.SetLineColor(r.kRed)
        pred2.SetLineColor(r.kGreen);
        obs.GetYaxis().SetRangeUser(0, 1.2*max(obs.GetMaximum(), pred.GetMaximum()))
        obs.SetStats(r.kFALSE)
        obs.Draw("hist")
        if bkgPredict: pred2.Draw("hist e same")
        pred.Draw("hist e same")
        leg = makeLegend()
        leg.AddEntry(pred, "Predicted", "L")
        if bkgPredict: leg.AddEntry(pred2, "Predicted (QCD Fit)", "L")
        leg.AddEntry(obs, "Observed", "L")
        leg.Draw()
        c.SaveAs("limit/%s_pred_obs.pdf" % channel.name)
    def plotA(path, *args, **kwargs):
        c = r.TCanvas("test")
        c.SetGrid()
        r.gROOT.SetStyle("Plain")
        nbins = len(args[0])
        hists = [r.TH1D("h%d" % idx, "h%d" %idx, nbins, 0.5, nbins+0.5) for idx in range(len(args))]
        for idx in range(nbins):
            for hidx, h in enumerate(hists):
                h.SetBinContent(idx+1, args[hidx][idx])
                h.SetBinError(idx+1, math.sqrt(args[hidx][idx]))
        cols = kwargs.get("cols", [r.kBlack, r.kRed])
        legs = kwargs.get("legend", [])
        legend = makeLegend() if len(legs) > 0 else None
        for hidx, h in enumerate(hists):
            h.SetLineColor(cols[hidx])
            h.GetYaxis().SetRangeUser(0, 1.2*max([h.GetMaximum() for h in hists]))
            if hidx == 0: h.Draw("hist e")
            else: h.Draw("hist e same")
            if hidx < len(legs): legend.AddEntry(h, legs[hidx], "L")
        if legend: legend.Draw()
        c.SaveAs(path)

    ctrl_channel = bkgPredict = None
    # Get the background prediction per bin
    if channel.bkgPrediction == "OtherChannel":
        ctrl_channel = channel.ctrlChannel
    elif channel.bkgPrediction == "QCDFit":
        bkgPredict = (channel.ewkN, channel.ewkErr, {})
    data = utils.getZeroData(channel, ctrl_channel)
    mc = utils.getZeroMC(channel, ctrl_channel)


    results = utils.makePredictions(data)

    systs = utils.getSystematicsBkg(channel, ctrl_channel)

    R = {"nominal": [b.R() for b in data]}

    for name, scaled in systs:
        R[name] = utils.getSystematicShiftsR(mc, scaled[0], scaled[1])
    R["MCStats"] = utils.mcStatsSystematicBkg(channel.bkgSamples, channel, ctrl_channel)
    NObserved = [b.observed() for b in data]
    NControl = [b.control() for b in data]
    NControlMC = [b.mcControl() for b in mc]
    print "Extracting signal data..."
    susyEff = utils.getZeroMCSignal(channel)
    controlRegionEff = utils.getZeroMCSignalControlRegion(channel)
    effSysts = utils.getSystematicsSignalEff(channel)

    for (m0, m12), p in susyEff.iteritems():
        p["effShift"] = {}
        p["control_efficiencies"] = controlRegionEff[(m0, m12)]["efficiencies"]
        for name,scaled in effSysts:
            shift = utils.getSystematicShiftsEff(p, scaled[0], scaled[1])
            p["effShift"][name] = shift
        if "pdfunc" in channel.includeSignalSysts:
            p["effShift"]["pdfunc"] = [0.1*eff for eff in p["efficiencies"]]

    plot(NObserved, results, NControl, R, bkgPredict)
    plotA("limit/R_%s.pdf" % channel.name, R["nominal"], legend = ["R"])
    return {
        "name" : ch.name,
        "NObserved" : NObserved,
        "NControl" : NControl,
        "NControlMC" : NControlMC,
        "bkgPredict" : bkgPredict,
        "R" : R,
        "lumi": ch.lumi,
        "triggerEfficiency":ch.triggerEfficiency,
        "lumiError": cfg.lumiError,
        "signal" : susyEff,
        }