예제 #1
0
 def predict(self, x, theta_true):
     """
     Run the unbinned ML fit
     """
     
     # Data
     roodata = RooDataSet('data', 'data', RooArgSet(self.phistar))
     for xval in x:
         self.phistar.setVal(xval)
         roodata.add(RooArgSet(self.phistar))
     
     theta = RooRealVar('theta', 'theta', 0.5, self.theta_min, self.theta_max)
     
     # The combined pdf
     model = RooAddPdf('model', 'model',
                       RooArgList(self.pdfs['A'], self.pdfs['H']),
                       RooArgList(theta))
     
     with stdout_redirected_to('%s/minuit_output.log' % self.outdir):
         res = model.fitTo(roodata, Save(True))
         nll = res.minNll()
     
     fitted_theta = theta.getValV()
     
     # Get Lambda(theta_true | theta_best)
     with stdout_redirected_to():
         logl = model.createNLL(roodata)
     
     theta.setVal(theta_true)
     nll_theta_true = logl.getValV()
     nll_ratio = nll_theta_true - nll
     
     return fitted_theta, nll, nll_ratio
예제 #2
0
    def predict(self, x, theta_true):
        """
        Run an unbinned ML fit to make predictions
        """
        
        # Create RooDataSet
        xs = self.scaler.transform(x)
        preds = self.model.predict(xs)[:, 1]

        min_nn_output_local, max_nn_output_local = np.min(preds), np.max(preds)
        if min_nn_output_local < self.min_nn_output:
            self.min_nn_output = min_nn_output_local
        if max_nn_output_local > self.max_nn_output:
            self.max_nn_output = max_nn_output_local
        
        roodata = RooDataSet('data', 'data', RooArgSet(self.roopred))
        for pred in preds:
            self.roopred.setVal(pred)
            roodata.add(RooArgSet(self.roopred))

        
        # Fit
        theta = RooRealVar('theta', 'theta', 0.5, self.theta_min, self.theta_max)
        
        model = RooAddPdf('model', 'model',
                          RooArgList(self.pdfs['A'], self.pdfs['H']),
                          RooArgList(theta))
        
        
        with stdout_redirected_to('%s/minuit_output.log' % self.outdir):
            res = model.fitTo(roodata, Save(True))
            nll = res.minNll()

        fitstatus = res.status()
        fitstatus |= (not subprocess.call(['grep', 'p.d.f value is less than zero', 'output_MLE_unbinned/minuit_output.log']))

        fitted_theta = theta.getValV()
        
        # Get Lambda(theta_true | theta_best)
        logl = model.createNLL(roodata)
        
        theta.setVal(theta_true)
        nll_theta_true = logl.getValV()
        nll_ratio = nll_theta_true - nll
  
        return fitted_theta, nll, nll_ratio, fitstatus
예제 #3
0
def fit_mass(data,
             column,
             x,
             sig_pdf=None,
             bkg_pdf=None,
             n_sig=None,
             n_bkg=None,
             blind=False,
             nll_profile=False,
             second_storage=None,
             log_plot=False,
             pulls=True,
             sPlot=False,
             bkg_in_region=False,
             importance=3,
             plot_importance=3):
    """Fit a given pdf to a variable distribution


    Parameter
    ---------
    data : |hepds_type|
        The data containing the variable to fit to
    column : str
        The name of the column to fit the pdf to
    sig_pdf : RooFit pdf
        The signal Probability Density Function. The variable to fit to has
        to be named 'x'.
    bkg_pdf : RooFit pdf
        The background Probability Density Function. The variable to fit to has
        to be named 'x'.
    n_sig : None or numeric
        The number of signals in the data. If it should be fitted, use None.
    n_bkg : None or numeric
        The number of background events in the data.
        If it should be fitted, use None.
    blind : boolean or tuple(numberic, numberic)
        If False, the data is fitted. If a tuple is provided, the values are
        used as the lower (the first value) and the upper (the second value)
        limit of a blinding region, which will be omitted in plots.
        Additionally, no true number of signal will be returned but only fake.
    nll_profile : boolean
        If True, a Negative Log-Likelihood Profile will be generated. Does not
        work with blind fits.
    second_storage : |hepds_type|
        A second data-storage that will be concatenated with the first one.
    importance : |importance_type|
        |importance_docstring|
    plot_importance : |plot_importance_type|
        |plot_importance_docstring|

    Return
    ------
    tuple(numerical, numerical)
        Return the number of signals and the number of backgrounds in the
        signal-region. If a blind fit is performed, the signal will be a fake
        number. If no number of background events is required, -999 will be
        returned.
    """

    if not (isinstance(column, str) or len(column) == 1):
        raise ValueError("Fitting to several columns " + str(column) +
                         " not supported.")
    if type(sig_pdf) == type(bkg_pdf) == None:
        raise ValueError("sig_pdf and bkg_pdf are both None-> no fit possible")
    if blind is not False:
        lower_blind, upper_blind = blind
        blind = True

    n_bkg_below_sig = -999
    # create data
    data_name = data.name
    data_array, _t1, _t2 = data.make_dataset(second_storage, columns=column)
    del _t1, _t2

    # double crystalball variables
    min_x, max_x = min(data_array[column]), max(data_array[column])

    #    x = RooRealVar("x", "x variable", min_x, max_x)

    # create data
    data_array = np.array([i[0] for i in data_array.as_matrix()])
    data_array.dtype = [('x', np.float64)]
    tree1 = array2tree(data_array, "x")
    data = RooDataSet("data", "Data", RooArgSet(x), RooFit.Import(tree1))

    #    # TODO: export somewhere? does not need to be defined inside...
    #    mean = RooRealVar("mean", "Mean of Double CB PDF", 5280, 5100, 5600)#, 5300, 5500)
    #    sigma = RooRealVar("sigma", "Sigma of Double CB PDF", 40, 0.001, 200)
    #    alpha_0 = RooRealVar("alpha_0", "alpha_0 of one side", 5.715)#, 0, 150)
    #    alpha_1 = RooRealVar("alpha_1", "alpha_1 of other side", -4.019)#, -200, 0.)
    #    lambda_0 = RooRealVar("lambda_0", "Exponent of one side", 3.42)#, 0, 150)
    #    lambda_1 = RooRealVar("lambda_1", "Exponent of other side", 3.7914)#, 0, 500)
    #
    #    # TODO: export somewhere? pdf construction
    #    frac = RooRealVar("frac", "Fraction of crystal ball pdfs", 0.479, 0.01, 0.99)
    #
    #    crystalball1 = RooCBShape("crystallball1", "First CrystalBall PDF", x,
    #                              mean, sigma, alpha_0, lambda_0)
    #    crystalball2 = RooCBShape("crystallball2", "Second CrystalBall PDF", x,
    #                              mean, sigma, alpha_1, lambda_1)
    #    doubleCB = RooAddPdf("doubleCB", "Double CrystalBall PDF",
    #                         crystalball1, crystalball2, frac)

    #    n_sig = RooRealVar("n_sig", "Number of signals events", 10000, 0, 1000000)

    # test input
    if n_sig == n_bkg == 0:
        raise ValueError("n_sig as well as n_bkg is 0...")

    if n_bkg is None:
        n_bkg = RooRealVar("n_bkg", "Number of background events", 10000, 0,
                           500000)
    elif n_bkg >= 0:
        n_bkg = RooRealVar("n_bkg", "Number of background events", int(n_bkg))
    else:
        raise ValueError("n_bkg is not >= 0 or None")

    if n_sig is None:
        n_sig = RooRealVar("n_sig", "Number of signal events", 1050, 0, 200000)

        # START BLINDING
        blind_cat = RooCategory("blind_cat", "blind state category")
        blind_cat.defineType("unblind", 0)
        blind_cat.defineType("blind", 1)
        if blind:
            blind_cat.setLabel("blind")
            blind_n_sig = RooUnblindPrecision("blind_n_sig",
                                              "blind number of signals",
                                              "wasistdas", n_sig.getVal(),
                                              10000, n_sig, blind_cat)
        else:
            #            blind_cat.setLabel("unblind")
            blind_n_sig = n_sig

        print "n_sig value " + str(n_sig.getVal())
#        raw_input("blind value " + str(blind_n_sig.getVal()))

#        n_sig = blind_n_sig

# END BLINDING
    elif n_sig >= 0:
        n_sig = RooRealVar("n_sig", "Number of signal events", int(n_sig))
    else:
        raise ValueError("n_sig is not >= 0")

#    if not blind:
#        blind_n_sig = n_sig

#    # create bkg-pdf
#    lambda_exp = RooRealVar("lambda_exp", "lambda exp pdf bkg", -0.00025, -1., 1.)
#    bkg_pdf = RooExponential("bkg_pdf", "Background PDF exp", x, lambda_exp)

    if blind:
        comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF",
                             RooArgList(sig_pdf, bkg_pdf),
                             RooArgList(blind_n_sig, n_bkg))
    else:
        comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF",
                             RooArgList(sig_pdf, bkg_pdf),
                             RooArgList(n_sig, n_bkg))

    # create test dataset
#    mean_gauss = RooRealVar("mean_gauss", "Mean of Gaussian", 5553, -10000, 10000)
#    sigma_gauss = RooRealVar("sigma_gauss", "Width of Gaussian", 20, 0.0001, 300)
#    gauss1 = RooGaussian("gauss1", "Gaussian test dist", x, mean_gauss, sigma_gauss)
#    lambda_data = RooRealVar("lambda_data", "lambda exp data", -.002)
#    exp_data = RooExponential("exp_data", "data example exp", x, lambda_data)
#    frac_data = RooRealVar("frac_data", "Fraction PDF of data", 0.15)
#
#    data_pdf = RooAddPdf("data_pdf", "Data PDF", gauss1, exp_data, frac_data)
#    data = data_pdf.generate(RooArgSet(x), 30000)

#    data.printValue()
#    xframe = x.frame()
#    data_pdf.plotOn(xframe)
#    print "n_cpu:", meta_config.get_n_cpu()
#    input("test")
#    comb_pdf.fitTo(data, RooFit.Extended(ROOT.kTRUE), RooFit.NumCPU(meta_config.get_n_cpu()))
#     HACK to get 8 cores in testing
    c5 = TCanvas("c5", "RooFit pdf not fit vs " + data_name)
    c5.cd()
    x_frame1 = x.frame()
    #    data.plotOn(x_frame1)
    #    comb_pdf.pdfList()[1].plotOn(x_frame1)

    if __name__ == "__main__":
        n_cpu = 8
    else:
        n_cpu = meta_config.get_n_cpu()
        print "n_cpu = ", n_cpu
        # HACK
#        n_cpu = 8
    result_fit = comb_pdf.fitTo(data, RooFit.Minos(ROOT.kTRUE),
                                RooFit.Extended(ROOT.kTRUE),
                                RooFit.NumCPU(n_cpu))
    # HACK end
    if bkg_in_region:
        x.setRange("signal", bkg_in_region[0], bkg_in_region[1])
        bkg_pdf_fitted = comb_pdf.pdfList()[1]
        int_argset = RooArgSet(x)
        #        int_argset = x
        #        int_argset.setRange("signal", bkg_in_region[0], bkg_in_region[1])
        integral = bkg_pdf_fitted.createIntegral(int_argset,
                                                 RooFit.NormSet(int_argset),
                                                 RooFit.Range("signal"))
        bkg_cdf = bkg_pdf_fitted.createCdf(int_argset, RooFit.Range("signal"))
        bkg_cdf.plotOn(x_frame1)

        #        integral.plotOn(x_frame1)
        n_bkg_below_sig = integral.getVal(int_argset) * n_bkg.getVal()
        x_frame1.Draw()

    if plot_importance >= 3:
        c2 = TCanvas("c2", "RooFit pdf fit vs " + data_name)
        c2.cd()
        x_frame = x.frame()
        #        if log_plot:
        #            c2.SetLogy()
        #        x_frame.SetTitle("RooFit pdf vs " + data_name)
        x_frame.SetTitle(data_name)
        if pulls:
            pad_data = ROOT.TPad("pad_data", "Pad with data and fit", 0, 0.33,
                                 1, 1)
            pad_pulls = ROOT.TPad("pad_pulls", "Pad with data and fit", 0, 0,
                                  1, 0.33)
            pad_data.SetBottomMargin(0.00001)
            pad_data.SetBorderMode(0)
            if log_plot:
                pad_data.SetLogy()
            pad_pulls.SetTopMargin(0.00001)
            pad_pulls.SetBottomMargin(0.2)
            pad_pulls.SetBorderMode(0)
            pad_data.Draw()
            pad_pulls.Draw()
            pad_data.cd()
        else:
            if log_plot:
                c2.SetLogy()
    if blind:
        # HACK
        column = 'x'
        # END HACK
        x.setRange("lower", min_x, lower_blind)
        x.setRange("upper", upper_blind, max_x)
        range_str = "lower,upper"
        lower_cut_str = str(
            min_x) + "<=" + column + "&&" + column + "<=" + str(lower_blind)
        upper_cut_str = str(
            upper_blind) + "<=" + column + "&&" + column + "<=" + str(max_x)
        sideband_cut_str = "(" + lower_cut_str + ")" + "||" + "(" + upper_cut_str + ")"

        n_entries = data.reduce(
            sideband_cut_str).numEntries() / data.numEntries()
        #        raw_input("n_entries: " + str(n_entries))
        if plot_importance >= 3:
            data.plotOn(x_frame, RooFit.CutRange(range_str),
                        RooFit.NormRange(range_str))
            comb_pdf.plotOn(
                x_frame, RooFit.Range(range_str),
                RooFit.Normalization(n_entries, RooAbsReal.Relative),
                RooFit.NormRange(range_str))
            if pulls:
                #                pull_hist(pull_frame=x_frame, pad_data=pad_data, pad_pulls=pad_pulls)
                x_frame_pullhist = x_frame.pullHist()
    else:
        if plot_importance >= 3:
            data.plotOn(x_frame)
            comb_pdf.plotOn(x_frame)
            if pulls:
                pad_pulls.cd()
                x_frame_pullhist = x_frame.pullHist()
                pad_data.cd()

            comb_pdf.plotOn(x_frame,
                            RooFit.Components(sig_pdf.namePtr().GetName()),
                            RooFit.LineStyle(ROOT.kDashed))
            comb_pdf.plotOn(x_frame,
                            RooFit.Components(bkg_pdf.namePtr().GetName()),
                            RooFit.LineStyle(ROOT.kDotted))
#            comb_pdf.plotPull(n_sig)

    if plot_importance >= 3:
        x_frame.Draw()

        if pulls:
            pad_pulls.cd()
            x_frame.SetTitleSize(0.05, 'Y')
            x_frame.SetTitleOffset(0.7, 'Y')
            x_frame.SetLabelSize(0.04, 'Y')

            #            c11 = TCanvas("c11", "RooFit\ pulls" + data_name)
            #            c11.cd()
            #            frame_tmp = x_frame
            frame_tmp = x.frame()

            #            frame_tmp.SetTitle("significance")

            frame_tmp.SetTitle("Roofit\ pulls\ " + data_name)
            frame_tmp.addObject(x_frame_pullhist)

            frame_tmp.SetMinimum(-5)
            frame_tmp.SetMaximum(5)

            #            frame_tmp.GetYaxis().SetTitle("significance")
            frame_tmp.GetYaxis().SetNdivisions(5)
            frame_tmp.SetTitleSize(0.1, 'X')
            frame_tmp.SetTitleOffset(1, 'X')
            frame_tmp.SetLabelSize(0.1, 'X')
            frame_tmp.SetTitleSize(0.1, 'Y')
            frame_tmp.SetTitleOffset(0.5, 'Y')
            frame_tmp.SetLabelSize(0.1, 'Y')

            frame_tmp.Draw()

#    raw_input("")

    if not blind and nll_profile:

        #        nll_range = RooRealVar("nll_range", "Signal for nLL", n_sig.getVal(),
        #                               -10, 2 * n_sig.getVal())
        sframe = n_sig.frame(RooFit.Bins(20), RooFit.Range(1, 1000))
        # HACK for best n_cpu
        lnL = comb_pdf.createNLL(data, RooFit.NumCPU(8))
        # HACK end
        lnProfileL = lnL.createProfile(ROOT.RooArgSet(n_sig))
        lnProfileL.plotOn(sframe, RooFit.ShiftToZero())
        c4 = TCanvas("c4", "NLL Profile")
        c4.cd()

        #        input("press ENTER to show plot")
        sframe.Draw()

    if plot_importance >= 3:
        pass

    params = comb_pdf.getVariables()
    params.Print("v")

    #    print bkg_cdf.getVal()

    if sPlot:
        sPlotData = ROOT.RooStats.SPlot(
            "sPlotData",
            "sPlotData",
            data,  # variable fitted to, RooDataSet
            comb_pdf,  # fitted pdf
            ROOT.RooArgList(
                n_sig,
                n_bkg,
                #                                                NSigB0s
            ))
        sweights = np.array([
            sPlotData.GetSWeight(i, 'n_sig') for i in range(data.numEntries())
        ])
        return n_sig.getVal(), n_bkg_below_sig, sweights

    if blind:
        return blind_n_sig.getVal(), n_bkg_below_sig, comb_pdf
    else:
        return n_sig.getVal(), n_bkg_below_sig, comb_pdf
예제 #4
0
def rooFit601():

    print ">>> setup pdf and likelihood..."
    x = RooRealVar("x", "x", -20, 20)
    mean = RooRealVar("mean", "mean of g1 and g2", 0)
    sigma1 = RooRealVar("sigma1", "width of g1", 3)
    sigma2 = RooRealVar("sigma2", "width of g2", 4, 3.0,
                        6.0)  # intentional strong correlations
    gauss1 = RooGaussian("gauss1", "gauss1", x, mean, sigma1)
    gauss2 = RooGaussian("gauss2", "gauss2", x, mean, sigma2)
    frac = RooRealVar("frac", "frac", 0.5, 0.0, 1.0)
    model = RooAddPdf("model", "model", RooArgList(gauss1, gauss2),
                      RooArgList(frac))

    print ">>> generate to data..."
    data = model.generate(RooArgSet(x), 1000)  # RooDataSet

    print ">>> construct unbinned likelihood of model wrt data..."
    nll = model.createNLL(data)  # RooAbsReal

    print ">>> interactive minimization and error analysis with MINUIT interface object..."
    minuit = RooMinuit(nll)

    print ">>> set avtive verbosity for logging of MINUIT parameter space stepping..."
    minuit.setVerbose(kTRUE)

    print ">>> call MIGRAD to minimize the likelihood..."
    minuit.migrad()

    print "\n>>> parameter values and error estimates that are back propagated from MINUIT:"
    model.getParameters(RooArgSet(x)).Print("s")

    print "\n>>> disable verbose logging..."
    minuit.setVerbose(kFALSE)

    print ">>> run HESSE to calculate errors from d2L/dp2..."
    minuit.hesse()

    print ">>> value of and error on sigma2 (back propagated from MINUIT):"
    sigma2.Print()

    print "\n>>> run MINOS on sigma2 parameter only..."
    minuit.minos(RooArgSet(sigma2))

    print "\n>>> value of and error on sigma2 (back propagated from MINUIT after running MINOS):"
    sigma2.Print()

    print "\n>>> saving results, contour plots..."
    # Save a snapshot of the fit result. This object contains the initial
    # fit parameters, the final fit parameters, the complete correlation
    # matrix, the EDM, the minimized FCN , the last MINUIT status code and
    # the number of times the RooFit function object has indicated evaluation
    # problems (e.g. zero probabilities during likelihood evaluation)
    result = minuit.save()  # RooFitResult

    # Make contour plot of mx vs sx at 1,2,3 sigma
    frame1 = minuit.contour(frac, sigma2, 1, 2, 3)  # RooPlot
    frame1.SetTitle("RooMinuit contour plot")

    # Print the fit result snapshot
    result.Print("v")

    print "\n>>> change value of \"mean\" parameter..."
    mean.setVal(0.3)

    # Rerun MIGRAD,HESSE
    print ">>> rerun MIGRAD, HESSE..."
    minuit.migrad()
    minuit.hesse()

    print ">>> value on and error of frac:"
    frac.Print()

    print "\n>>> fix value of \"sigma\" parameter (setConstant)..."
    sigma2.setConstant(kTRUE)

    print ">>> rerun MIGRAD, HESSE..."
    minuit.migrad()
    minuit.hesse()
    frac.Print()
예제 #5
0
    def predict_and_plot(self, x): 
        """
        Do the same as predict(), but add plots
        
        Return -logL ratio, for external plotting
        """

        rcParams['xtick.major.pad'] = 12
        rcParams['ytick.major.pad'] = 12
        
        roodata = RooDataSet('data', 'data', RooArgSet(self.phistar))
        for xval in x:
            self.phistar.setVal(xval)
            roodata.add(RooArgSet(self.phistar))
        theta = RooRealVar('theta', 'theta', self.theta_min, self.theta_max)
        
        model = RooAddPdf('model', 'model',
                          RooArgList(self.pdfs['A'], self.pdfs['H']),
                          RooArgList(theta))
        
        #with stdout_redirected_to():
        print '\n\nPHISTAR FIT'
        model.fitTo(roodata)
        fitted_theta = theta.getValV()        
        
        # Histogram binning for data points
        nbins = 10
        
        xvals = np.linspace(0, 2*pi, 200)
        yvals_H = []
        yvals_A = []
        
        # Get points for pdf curves
        for xval in xvals:
            self.phistar.setVal(xval)
            yvals_H.append(self.pdfs['H'].getValV(RooArgSet(self.phistar)))
            yvals_A.append(self.pdfs['A'].getValV(RooArgSet(self.phistar)))
        
        yvals_H = np.array(yvals_H)
        yvals_A = np.array(yvals_A)

        # Plot pdfs by themselves
        #print 'integral H =', np.trapz(yvals_H, dx=1.0/200.0)
        fig = plt.figure()
        plt.plot(xvals, yvals_H, color=self.colors['H'], label=r'$p_{H}(\varphi^{*})$')
        plt.plot(xvals, yvals_A, color=self.colors['A'], label=r'$p_{A}(\varphi^{*})$')
        plt.fill_between(xvals, 0, yvals_H, color=self.colors['H'], alpha=0.2)
        plt.fill_between(xvals, 0, yvals_A, color=self.colors['A'], alpha=0.2)
        plt.xlim([0, 2*pi])
        plt.ylim([0, 0.295])
        plt.xlabel(r'$\varphi^*$')
        plt.ylabel('Probability density')
        plt.legend(loc='upper right')
        plt.tight_layout()
        fig.show()
        fig.savefig('phistar_pdfs.pdf')

        # Scale to event yield
        yvals_H *= roodata.numEntries()*(1-fitted_theta)/float(nbins)*2*pi
        yvals_A *= roodata.numEntries()*(fitted_theta)/float(nbins)*2*pi
        yvals_sum = yvals_H + yvals_A
        
        # Make plot
        fig, ax = plt.subplots(1)
        histentries, binedges = np.histogram(x, bins=nbins, range=(0, 2*pi))
        bincenters = (binedges[:-1] + binedges[1:])/2.0
        yerr = np.sqrt(histentries)
        plt.errorbar(bincenters, histentries, xerr=np.diff(binedges)*0.5, yerr=yerr, linestyle='None', ecolor='black', label='Data')
        plt.plot(xvals, yvals_H, color=self.colors['H'], label=r'$p_{H}(\varphi^{*})$')
        plt.plot(xvals, yvals_A, color=self.colors['A'], label=r'$p_{A}(\varphi^{*})$')
        plt.plot(xvals, yvals_sum, color=self.colors['model'], label=r'$p(\varphi^{*} \,|\, \alpha = %.2f)$' % fitted_theta)
        plt.fill_between(xvals, 0, yvals_H, color=self.colors['H'], alpha=0.2)
        plt.fill_between(xvals, 0, yvals_A, color=self.colors['A'], alpha=0.2)
        
        # Set correct legend order
        handles, labels = ax.get_legend_handles_labels()
        handles = [handles[3], handles[2], handles[0], handles[1]]
        labels = [labels[3], labels[2], labels[0], labels[1]]
        ax.legend(handles, labels, loc='upper right')
        
        plt.xlabel(r'$\varphi^{*}$')
        plt.ylabel('Events / %.2f' % ((2*pi)/nbins))
        
        axes = plt.gca()
        axes.set_xlim([0, 2*pi])
        axes.set_ylim([0, max(histentries)*1.8])
        
        plt.tight_layout()
        fig.show()
        fig.savefig('phistar_fit.pdf')
        
        # Create likelihood curve
        logl = model.createNLL(roodata)
        
        # Extract curve
        xvals = np.linspace(0, 1, 200)
        yvals = []
        ymin = 999.
        for xval in xvals:
            theta.setVal(xval)
            yvals.append(logl.getValV())
            if yvals[-1] < ymin:
                ymin = yvals[-1]
        
        # Shift minimum to zero
        yvals = np.array(yvals)
        yvals -= ymin
        
        # Return points for the NLL curve
        return xvals, yvals
예제 #6
0
    def predict_and_plot(self, x):
        """
        Do the same as predict(), but add plots
        
        Return -logL ratio, for external plotting
        """

        rcParams['xtick.major.pad'] = 12
        rcParams['ytick.major.pad'] = 12
        
        
        xs = self.scaler.transform(x)
        preds = self.model.predict(xs)[:, 1]
        
        roodata = RooDataSet('data', 'data', RooArgSet(self.roopred))
        for pred in preds:
            self.roopred.setVal(pred)
            roodata.add(RooArgSet(self.roopred))

        theta = RooRealVar('theta', 'theta', 0.5, self.theta_min, self.theta_max)
        model = RooAddPdf('model', 'model',
                          RooArgList(self.pdfs['A'], self.pdfs['H']),
                          RooArgList(theta))
        
        
        #with stdout_redirected_to():
        print '\n\nNEURAL NETWORK FIT'
        res = model.fitTo(roodata, PrintLevel(10))
        
        fitted_theta = theta.getValV()
            
        # Histogram binning for data points
        nbins = 14
        
        xvals = np.linspace(0, 1, 300)
        yvals_H = []
        yvals_A = []
        
        # Get points for pdf curves
        for xval in xvals:
            self.roopred.setVal(xval)
            yvals_H.append(self.pdfs['H'].getValV(RooArgSet(self.roopred)))
            yvals_A.append(self.pdfs['A'].getValV(RooArgSet(self.roopred)))
        
        yvals_H = np.array(yvals_H)
        yvals_A = np.array(yvals_A)
        
        # Plot pdfs by themselves
        fig = plt.figure()
        plt.plot(xvals, yvals_H, color=self.colors["H"], label=r'$p_{H}(y)$')
        plt.plot(xvals, yvals_A, color=self.colors["A"], label=r'$p_{A}(y)$')
        plt.fill_between(xvals, 0, yvals_H, color=self.colors["H"], alpha=0.2)
        plt.fill_between(xvals, 0, yvals_A, color=self.colors["A"], alpha=0.2)
        
        plt.xlim([0, 1])
        plt.ylim([0, 11])
        plt.xlabel(r'Network output ($y$)')
        plt.ylabel('Probability density')
        plt.legend(loc='upper right')
        plt.tight_layout()
        fig.show()
        fig.savefig('mle_nn_pdfs.pdf')
        
        # Scale to event yield
        yvals_H *= roodata.numEntries()*(1-fitted_theta)/float(nbins)
        yvals_A *= roodata.numEntries()*(fitted_theta)/float(nbins)
        yvals_sum = yvals_H + yvals_A
        
        
        # Make plot of fit to data
        fig, ax = plt.subplots(1)
        histentries, binedges = np.histogram(preds, bins=nbins, range=(0, 1))
        bincenters = (binedges[:-1] + binedges[1:])/2.0
        yerr = np.sqrt(histentries)
        plt.errorbar(bincenters, histentries, xerr=np.diff(binedges)*0.5, yerr=yerr, linestyle='None', ecolor='black', label='Data')
        plt.plot(xvals, yvals_H, color=self.colors["H"], label=r'$p_{H}(y)$')
        plt.plot(xvals, yvals_A, color=self.colors["A"], label=r'$p_{A}(y)$')
        plt.plot(xvals, yvals_sum, color=self.colors["model"], label=r'$p(y \,|\, \alpha = %.2f)$' % fitted_theta)
        plt.fill_between(xvals, 0, yvals_H, color=self.colors["H"], alpha=0.2)
        plt.fill_between(xvals, 0, yvals_A, color=self.colors["A"], alpha=0.2)
        
        # Set correct legend order
        handles, labels = ax.get_legend_handles_labels()
        handles = [handles[3], handles[2], handles[0], handles[1]]
        labels = [labels[3], labels[2], labels[0], labels[1]]
        ax.legend(handles, labels, loc='upper right')
        
        plt.xlabel(r'Network output ($y$)')
        plt.ylabel('Events / %.2f' % (1.0/nbins))
        
        axes = plt.gca()
        axes.set_xlim([0, 1])
        axes.set_ylim([0, max(histentries)*2.3])
        plt.tight_layout()
        fig.show()
        fig.savefig('mle_nn_fit.pdf')
        
        
        # Create likelihood curve
        logl = model.createNLL(roodata)
        
        xvals = np.linspace(0, 1, 200)
        yvals = []
        ymin = 999.
        for xval in xvals:
            theta.setVal(xval)
            yvals.append(logl.getValV())
            if yvals[-1] < ymin:
                ymin = yvals[-1]
        
        yvals = np.array(yvals)
        yvals -= ymin
        
        # Return points for the NLL curve
        return xvals, yvals