def gen_samples(nevents, fraction=0.9, slope=0.005):
    fract = sum(np.random.binomial(1, 1 - fraction, nevents) == 0)
    bound = (2900, 3300)
    bkg_m = gen_toy(lambda x: slope * np.math.exp(-slope * x), nevents // 2, bound)
    sig_m = np.random.normal(3096.916, 12, fract)
    tot_m = np.concatenate([sig_m, bkg_m])

    bkg_u = gen_toy(lambda x: slope * np.math.exp(-slope * x), nevents // 2, bound)
    sig_u = np.random.normal(3096.916, 12, nevents - fract)
    tot_u = np.concatenate([sig_u, bkg_u])

    print("matching efficiency = ", fract / nevents)

    return tot_m, tot_u
# </code>
# 
# If your cost function is more complex than adding them together this is the to do it.

# <markdowncell>

# ###Toy generation.
# This is invert CDF implementation (not accept reject). Large overhead but fast element-wise. Anyone want to signup for accept/reject?

# <codecell>

from probfit import gen_toy

# <codecell>

toy = gen_toy(total_pdf, 1000, (1.83,1.91), mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5, quiet=False)

# <codecell>

hist(toy, bins=100, histtype='step');

# <codecell>

ulh = UnbinnedLH(total_pdf, toy)
m = Minuit(ulh, mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5)
m.migrad();
ulh.show(m)

# <markdowncell>

# ####Tips
Example #3
0
from iminuit import Minuit
from probfit import BinnedLH, Extended, AddPdf, gen_toy
from probfit.pdf import HistogramPdf
from probfit.plotting import draw_pdf
import numpy as np

bound = (0, 10)
np.random.seed(0)
bkg = gen_toy(lambda x: x**2, 100000, bound=bound)  # a parabola background
sig = np.random.randn(50000) + 5  # a Gaussian signal
data = np.concatenate([sig, bkg])
# fill histograms with large statistics
hsig, be = np.histogram(sig, bins=40, range=bound)
hbkg, be = np.histogram(bkg, bins=be, range=bound)
# randomize data
data = np.random.permutation(data)
fitdata = data[:1000]

psig = HistogramPdf(hsig, be)
pbkg = HistogramPdf(hbkg, be)
epsig = Extended(psig, extname='N1')
epbkg = Extended(pbkg, extname='N2')
pdf = AddPdf(epbkg, epsig)

blh = BinnedLH(pdf, fitdata, bins=40, bound=bound, extended=True)
m = Minuit(blh, N1=330, N2=670, error_N1=20, error_N2=30)
#m.migrad()
blh.draw(m, parts=True)
# </code>
# 
# If your cost function is more complex than adding them together this is the to do it.

# <markdowncell>

# ###Toy generation.
# This is invert CDF implementation (not accept reject). Large overhead but fast element-wise. Anyone want to signup for accept/reject?

# <codecell>

from probfit import gen_toy

# <codecell>

toy = gen_toy(total_pdf, 1000, (1.83,1.91), mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5, quiet=False)

# <codecell>

hist(toy, bins=100, histtype='step');

# <codecell>

ulh = UnbinnedLH(total_pdf, toy)
m = Minuit(ulh, mass=1.87, gamma=0.01, c=1.045, m=-0.43, f_0=0.5)
m.migrad();
ulh.show(m)

# <markdowncell>

# ####Tips
Example #5
0
from iminuit import Minuit
from probfit import BinnedLH, Extended, AddPdf, gen_toy
from probfit.pdf import HistogramPdf
import numpy as np

bound = (0, 10)
np.random.seed(0)
bkg = gen_toy(lambda x : x**2, 100000, bound=bound) # a parabola background
sig= np.random.randn(50000)+5  # a Gaussian signal
data= np.concatenate([sig,bkg])
# fill histograms with large statistics
hsig,be= np.histogram(sig, bins=40, range=bound);
hbkg,be= np.histogram(bkg, bins=be, range=bound);
# randomize data 
data= np.random.permutation(data)
fitdata= data[:1000] 

psig= HistogramPdf(hsig,be)
pbkg= HistogramPdf(hbkg,be)
epsig= Extended(psig, extname='N1')
epbkg= Extended(pbkg, extname='N2')
pdf= AddPdf(epbkg,epsig)

blh= BinnedLH(pdf, fitdata, bins=40, bound=bound, extended=True)
m= Minuit(blh, N1=330, N2= 670, error_N1=20, error_N2=30)
#m.migrad()
blh.draw(m, parts=True)
Example #6
0
# <markdowncell>

# ##But... We can't normalize everything analytically and how to generate toy sample from PDF
# 
# When fitting distribution to a PDF, one of the common problem that we run into is normalization.
# Not all function is analytically integrable on the range of our interest.
# 
# Let's look at an example: the [Crystal Ball function](http://en.wikipedia.org/wiki/Crystal_Ball_function).
# It's simply a gaussian with a power law tail ... normally found in energy deposited in crystals ...
# impossible to normalize analytically and normalization will depend on shape parameters.

# <codecell>

numpy.random.seed(0)
bound = (-1, 2)
data = probfit.gen_toy(probfit.crystalball, 10000, bound=bound, alpha=1., n=2., mean=1., sigma=0.3, quiet=False)
# quiet=False tells gen_toy to plot out original function
# toy histogram and poisson error from both orignal distribution and toy

# <codecell>

# To fit this function as a distribution we need to normalize
# so that is becomes a PDF ober the range we consider here.
# We do this with the probfit.Normalized functor, which implements
# the trapezoid numerical integration method with a simple cache mechanism
normalized_crystalball = probfit.Normalized(probfit.crystalball, bound)
# this can also bedone with decorator
# @probfit.normalized(bound)
# def my_function(x, blah):
#    return something
pars = 1.0, 1, 2, 1, 0.3
Example #7
0
from histimator.models import HistiModel, HistiChannel, HistiSample
import numpy as np

from probfit import gen_toy, BinnedLH
from iminuit import Minuit

m = HistiModel('first model')

bound = (0, 10)
np.random.seed(0)
bkg = gen_toy(lambda x: x**2, 100, bound=bound)  # a parabola background
sig = np.random.randn(50) + 5  # a Gaussian signal
hsig = np.histogram(sig, bins=10, range=bound)
hbkg = np.histogram(bkg, bins=10, range=bound)

signal = HistiSample("signal")
signal.SetHisto(hsig)
signal.AddNorm("SigXSecOverSM", 0.5, 0, 3)

background = HistiSample("background1")
background.SetHisto(hbkg)

background.AddOverallSys("JES", 0.9, 1.1)

chan = HistiChannel("SR")
chan.AddSample(signal)
chan.AddSample(background)

m.AddChannel(chan)

data = gen_toy(lambda x: m.pdf(x, 1, 1), 150, (0, 10))