def gen_samples(nevents, fraction=0.9, slope=0.005): fract = sum(np.random.binomial(1, 1 - fraction, nevents) == 0) bound = (2900, 3300) bkg_m = gen_toy(lambda x: slope * np.math.exp(-slope * x), nevents // 2, bound) sig_m = np.random.normal(3096.916, 12, fract) tot_m = np.concatenate([sig_m, bkg_m]) bkg_u = gen_toy(lambda x: slope * np.math.exp(-slope * x), nevents // 2, bound) sig_u = np.random.normal(3096.916, 12, nevents - fract) tot_u = np.concatenate([sig_u, bkg_u]) print("matching efficiency = ", fract / nevents) return tot_m, tot_u
# </code> # # If your cost function is more complex than adding them together this is the to do it. # <markdowncell> # ###Toy generation. # This is invert CDF implementation (not accept reject). Large overhead but fast element-wise. Anyone want to signup for accept/reject? # <codecell> from probfit import gen_toy # <codecell> toy = gen_toy(total_pdf, 1000, (1.83,1.91), mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5, quiet=False) # <codecell> hist(toy, bins=100, histtype='step'); # <codecell> ulh = UnbinnedLH(total_pdf, toy) m = Minuit(ulh, mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5) m.migrad(); ulh.show(m) # <markdowncell> # ####Tips
from iminuit import Minuit from probfit import BinnedLH, Extended, AddPdf, gen_toy from probfit.pdf import HistogramPdf from probfit.plotting import draw_pdf import numpy as np bound = (0, 10) np.random.seed(0) bkg = gen_toy(lambda x: x**2, 100000, bound=bound) # a parabola background sig = np.random.randn(50000) + 5 # a Gaussian signal data = np.concatenate([sig, bkg]) # fill histograms with large statistics hsig, be = np.histogram(sig, bins=40, range=bound) hbkg, be = np.histogram(bkg, bins=be, range=bound) # randomize data data = np.random.permutation(data) fitdata = data[:1000] psig = HistogramPdf(hsig, be) pbkg = HistogramPdf(hbkg, be) epsig = Extended(psig, extname='N1') epbkg = Extended(pbkg, extname='N2') pdf = AddPdf(epbkg, epsig) blh = BinnedLH(pdf, fitdata, bins=40, bound=bound, extended=True) m = Minuit(blh, N1=330, N2=670, error_N1=20, error_N2=30) #m.migrad() blh.draw(m, parts=True)
from iminuit import Minuit from probfit import BinnedLH, Extended, AddPdf, gen_toy from probfit.pdf import HistogramPdf import numpy as np bound = (0, 10) np.random.seed(0) bkg = gen_toy(lambda x : x**2, 100000, bound=bound) # a parabola background sig= np.random.randn(50000)+5 # a Gaussian signal data= np.concatenate([sig,bkg]) # fill histograms with large statistics hsig,be= np.histogram(sig, bins=40, range=bound); hbkg,be= np.histogram(bkg, bins=be, range=bound); # randomize data data= np.random.permutation(data) fitdata= data[:1000] psig= HistogramPdf(hsig,be) pbkg= HistogramPdf(hbkg,be) epsig= Extended(psig, extname='N1') epbkg= Extended(pbkg, extname='N2') pdf= AddPdf(epbkg,epsig) blh= BinnedLH(pdf, fitdata, bins=40, bound=bound, extended=True) m= Minuit(blh, N1=330, N2= 670, error_N1=20, error_N2=30) #m.migrad() blh.draw(m, parts=True)
# <markdowncell> # ##But... We can't normalize everything analytically and how to generate toy sample from PDF # # When fitting distribution to a PDF, one of the common problem that we run into is normalization. # Not all function is analytically integrable on the range of our interest. # # Let's look at an example: the [Crystal Ball function](http://en.wikipedia.org/wiki/Crystal_Ball_function). # It's simply a gaussian with a power law tail ... normally found in energy deposited in crystals ... # impossible to normalize analytically and normalization will depend on shape parameters. # <codecell> numpy.random.seed(0) bound = (-1, 2) data = probfit.gen_toy(probfit.crystalball, 10000, bound=bound, alpha=1., n=2., mean=1., sigma=0.3, quiet=False) # quiet=False tells gen_toy to plot out original function # toy histogram and poisson error from both orignal distribution and toy # <codecell> # To fit this function as a distribution we need to normalize # so that is becomes a PDF ober the range we consider here. # We do this with the probfit.Normalized functor, which implements # the trapezoid numerical integration method with a simple cache mechanism normalized_crystalball = probfit.Normalized(probfit.crystalball, bound) # this can also bedone with decorator # @probfit.normalized(bound) # def my_function(x, blah): # return something pars = 1.0, 1, 2, 1, 0.3
from histimator.models import HistiModel, HistiChannel, HistiSample import numpy as np from probfit import gen_toy, BinnedLH from iminuit import Minuit m = HistiModel('first model') bound = (0, 10) np.random.seed(0) bkg = gen_toy(lambda x: x**2, 100, bound=bound) # a parabola background sig = np.random.randn(50) + 5 # a Gaussian signal hsig = np.histogram(sig, bins=10, range=bound) hbkg = np.histogram(bkg, bins=10, range=bound) signal = HistiSample("signal") signal.SetHisto(hsig) signal.AddNorm("SigXSecOverSM", 0.5, 0, 3) background = HistiSample("background1") background.SetHisto(hbkg) background.AddOverallSys("JES", 0.9, 1.1) chan = HistiChannel("SR") chan.AddSample(signal) chan.AddSample(background) m.AddChannel(chan) data = gen_toy(lambda x: m.pdf(x, 1, 1), 150, (0, 10))