Python Model 예제들, fastprof.Model Python 예제들

예제 #1

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
import math
import scipy

# Define the models
model_filename = 'run/high_mass_gg_1300_100.json'
hypos_filename = 'run/hypos_high_mass_gg_1300.json'
poi = 'mu'
poi_init = 1
poi_min = -3
poi_max = 20
output_filename = 'samples/high_mass_gg_1300'
ntoys = 10000

fast_model = Model.create(model_filename)
#fast_data = Data(fast_model).load(model_filename)
fast_data = Data(fast_model).set_expected(fast_model.expected_pars(0))

with open(hypos_filename, 'r') as fd:
    hypo_dicts = json.load(fd)

hypo_mus = [hd[poi] for hd in hypo_dicts]

print(fast_model)

np.random.seed(131071)

for hd in hypo_dicts:
    if not 'cl' in hd:
        hd['cl'] = QMu(hd[poi], hd['tmu'], hd['best_fit_val']).asymptotic_cl()

예제 #2

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer
import copy

np.set_printoptions(precision=4, suppress=False, linewidth=180)

np.random.seed(4)

# ==========================================
model = Model().load('run/fastprof/HighMass_NW-1700-log200-noDSig.json')
pars0 = model.expected_pars(0.1)
data = model.generate_data(pars0)

opti = OptiMinimizer(data, 0.1, (0, 20))
t = opti.tmu(0.1)

print(opti.hypo_pars)
print(model.closure_approx(opti.hypo_pars, data))
print(model.closure_exact(opti.hypo_pars, data))

# ==========================================
model2 = model.regularize(2)
data2 = copy.copy(data)
data2.model = model2
data2.aux_betas = np.zeros(model2.nb)

opti2 = OptiMinimizer(data2, 0.1, (0, 20))
t2 = opti2.tmu(0.1)

print(opti2.hypo_pars)

예제 #3

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer

np.random.seed(7)

model = Model().load('fastprof/models/HighMass_NW-700-log500-gammas-only.json')
pars0 = model.expected_pars(5)

pars = model.expected_pars(5)
pars.gammas = np.random.normal(pars.gammas, 1)
print('Randomized gammas = ', pars.gammas)

model.linear_nps = False
data_exp = Data(model).set_expected(pars)
model.linear_nps = True
data_lin = Data(model).set_expected(pars)

npm = NPMinimizer(5, data_lin)
print(npm.profile())
min_pars = npm.min_pars

plt.ion()
plt.show()

fig1 = plt.figure(1)
fig1.canvas.set_window_title('True pars, data from linear model')
model.plot(pars, data_lin, residuals=True)
plt.xlim(150,300)

fig2 = plt.figure(2)

예제 #4

0

파일 보기

파일: test_fasttoy.py 프로젝트: fastprof-hep/fastprof

n = np.array([5, 12])
bkg = np.array([1.0, 10.0])

alpha = np.array([1])
beta = np.array([-1.6])

a = np.array([[0.2], [0.2]])
b = np.array([[0.2], [0.2]])
data = np.array([7, 12, 0, 1])

models = {}
n_np = 2

for mu in mus:
    sig = np.array([1.0, 0]) * mu  # specific to this case!
    models[mu] = Model(np.array(data[:-n_np]), sig, bkg, np.array([data[3]]),
                       np.array([data[2]]), a, b)

nlls = np.zeros(mus.shape[0])

for i, mu in enumerate(mus):
    model = models[mu]
    model.set_all_data(np.array(data[:-n_np]), np.array([data[3]]),
                       np.array([data[2]]))
    nlls[i] = NPMinimizer().profile_nll(model)

plt.ion()
plt.plot(mus, nlls, 'b')

smooth_nll = InterpolatedUnivariateSpline(mus, nlls, k=4)
cr_pts = smooth_nll.derivative().roots()
x = np.linspace(0, mus[-1], 100)

예제 #5

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
from fastprof import Model, ScanSampler, OptiSampler

model = Model(sig=np.array([1.0, 0]),
              bkg=np.array([1.0, 10.0]),
              alphas=['acc_sys'],
              betas=['bkg_sys'],
              a=np.array([[0.2], [0.2]]),
              b=np.array([[0.2], [0.2]]))

gen_mu = 3.7
print('Will generate the following hypothesis: ', gen_mu)

scan_mus = np.linspace(0, 10, 21)
print('Will scan over the following hypotheses: ', scan_mus)

np.random.seed(131071)
#dist = ScanSampler(model, scan_mus).generate(gen_mu, 10000)
dist = OptiSampler(model, mu0=1, bounds=(0, 20),
                   method='scalar').generate(gen_mu, 10000)
dist.sort()

plt.ion()
plt.yscale('log')
plt.hist(dist.samples[:], bins=50)

예제 #6

0

파일 보기

파일: check_model.py 프로젝트: fastprof-hep/fastprof

def run(argv=None):
    parser = make_parser()
    options = parser.parse_args()
    if not options:
        parser.print_help()
        return

    model = Model.create(options.model_file)
    if model is None:
        raise ValueError('No valid model definition found in file %s.' %
                         options.model_file)
    if options.regularize is not None:
        model.set_gamma_regularization(options.regularize)
    if options.cutoff is not None: model.cutoff = options.cutoff
    if options.setrange is not None: process_setranges(options.setrange, model)

    raster = Raster('data', model=model, filename=options.fits_file)

    if options.data_file:
        data = Data(model).load(options.data_file)
        if data == None:
            raise ValueError('No valid dataset definition found in file %s.' %
                             options.data_file)
        print('Using dataset stored in file %s.' % options.data_file)
    elif options.asimov is not None:
        try:
            sets = process_setvals(options.asimov, model)
        except Exception as inst:
            print(inst)
            raise ValueError("ERROR : invalid POI specification string '%s'." %
                             options.asimov)
        data = model.generate_expected(sets)
        print('Using Asimov dataset with parameters %s' % str(sets))
    else:
        print('Using dataset stored in file %s.' % options.model_file)
        data = Data(model).load(options.model_file)

    if options.test_statistic == 'q~mu':
        if len(raster.pois()) > 1:
            raise ValueError(
                'Currently not supporting more than 1 POI for this operation')
        calc = QMuTildaCalculator(
            OptiMinimizer(niter=options.iterations).set_pois_from_model(model))
    elif options.test_statistic == 'q_mu':
        if len(raster.pois()) > 1:
            raise ValueError(
                'Currently not supporting more than 1 POI for this operation')
        calc = QMuCalculator(
            OptiMinimizer(niter=options.iterations).set_pois_from_model(model))
    else:
        raise ValueError('Unknown test statistic %s' % options.test_statistic)
    calc.fill_all_pv(raster)
    faster = calc.recompute_raster(raster, data)
    raster.print(verbosity=options.verbosity, other=faster)
    if options.verbosity > 2: print(str(faster))
    # Plot results
    if not options.batch_mode:
        poi = raster.pois()[list(raster.pois())[0]]
        plt.ion()
        fig1 = plt.figure(1)
        plt.suptitle('$CL_{s+b}$')
        plt.xlabel(model.poi(0).name)
        plt.ylabel('$CL_{s+b}$')
        plt.plot([hypo[poi.name] for hypo in raster.plr_data],
                 [full.pvs['pv'] for full in raster.plr_data.values()],
                 options.marker + 'r:',
                 label='Full model')
        plt.plot([hypo[poi.name] for hypo in faster.plr_data],
                 [fast.pvs['pv'] for fast in faster.plr_data.values()],
                 options.marker + 'g-',
                 label='Fast model')
        plt.legend()

        fig2 = plt.figure(2)
        plt.suptitle('$CL_s$')
        plt.xlabel(model.poi(0).name)
        plt.ylabel('$CL_s$')
        plt.plot([hypo[poi.name] for hypo in raster.plr_data],
                 [full.pvs['cls'] for full in raster.plr_data.values()],
                 options.marker + 'r:',
                 label='Full model')
        plt.plot([hypo[poi.name] for hypo in faster.plr_data],
                 [fast.pvs['cls'] for fast in faster.plr_data.values()],
                 options.marker + 'g-',
                 label='Fast model')
        plt.legend()
        fig1.savefig(options.output_file + '_clsb.pdf')
        fig2.savefig(options.output_file + '_cls.pdf')
        plt.show()

예제 #7

0

파일 보기

파일: dump_debug.py 프로젝트: fastprof-hep/fastprof

def run(argv=None):
    parser = make_parser()

    options = parser.parse_args()
    if not options:
        parser.print_help()
        sys.exit(0)

    debug = pd.read_csv(options.filename[0])

    plt.ion()
    fig1, ax1 = plt.subplots(2, 2)

    debug.hist('mu_hat', ax=ax1[0, 0], bins=options.nbins)
    debug.hist('tmu',
               ax=ax1[0, 1],
               bins=np.linspace(0, options.tmu_range, options.nbins))
    debug.hist('pv', ax=ax1[1, 0], bins=options.nbins)
    debug.hist('nfev', ax=ax1[1, 1])

    if options.log_scale:
        ax1[0, 0].set_yscale('log')
        ax1[0, 1].set_yscale('log')
        ax1[0, 1].set_ylim(bottom=1)
        ax1[1, 0].set_yscale('log')

    if options.reference:
        mu_hat = debug['mu_hat']
        xx = np.linspace(np.min(mu_hat), np.max(mu_hat), options.nbins)
        yy = [
            mu_hat.shape[0] * (xx[1] - xx[0]) *
            norm.pdf(x, np.mean(mu_hat), np.std(mu_hat)) for x in xx
        ]
        ax1[0, 0].plot(xx, yy)
        tmu = debug['tmu']
        xx = np.linspace(0, options.tmu_range, options.nbins)
        yy = [tmu.shape[0] * (xx[1] - xx[0]) * chi2.pdf(x, 1) for x in xx]
        ax1[0, 1].plot(xx, yy)
        xx = np.linspace(0, 1, options.nbins)
        yy = [mu_hat.shape[0] * (xx[1] - xx[0]) for x in xx]
        ax1[1, 0].plot(xx, yy)

    if options.hypo != '':
        model = Model.create(options.model_file)
        if model == None:
            raise ValueError('No valid model definition found in file %s.' %
                             options.model_file)
        try:
            filename, index = options.hypo.split(':')
            index = int(index)
            raster = Raster('data', model=model, filename=filename)
            plr_data = list(raster.plr_data.values())
            hypo = list(raster.plr_data.keys())[index]
            print('Using hypothesis %s' % str(hypo.dict(pois_only=True)))
        except Exception as inst:
            print(inst)
            raise ValueError(
                'Invalid hypothesis spec, should be in the format <filename>:<index>'
            )

    z = options.np_range
    pars = [
        col[len('free_'):] for col in debug.columns
        if col.startswith('free_') and not col.endswith('nll')
    ]
    fig2, ax2 = plt.subplots(2, len(pars), figsize=(15, 5), sharey=True)
    fig2.subplots_adjust(left=0.05, right=0.98)
    if options.reference:
        xx = np.linspace(-z, z, options.nbins)
        yy = [
            mu_hat.shape[0] * (xx[1] - xx[0]) * norm.pdf(x, 0, 1) for x in xx
        ]
    for i, par in enumerate(pars):
        ax2[0, i].set_title(par)
        free_delta = debug['free_' + par] - -debug[
            'aux_' + par] if 'aux_' + par in debug.columns else debug['free_' +
                                                                      par]
        hypo_delta = debug['hypo_' + par] - -debug[
            'aux_' + par] if 'aux_' + par in debug.columns else debug['hypo_' +
                                                                      par]
        if options.hypo != '':
            print('Shifting distributions of %s by %g' % (par, hypo[par]))
            free_delta -= hypo[par]
            hypo_delta -= hypo[par]
        ax2[0, i].hist(free_delta, bins=np.linspace(-z, z, options.nbins))
        ax2[1, i].hist(hypo_delta, bins=np.linspace(-z, z, options.nbins))
        if options.reference:
            ax2[0, i].plot(xx, yy)
            ax2[1, i].plot(xx, yy)
        if options.log_scale: ax2[0, i].set_yscale('log')
        if options.log_scale: ax2[1, i].set_yscale('log')
    ax2[0, 0].set_ylabel('free fit')
    ax2[1, 0].set_ylabel('hypothesis fit')

    #free_g = sns.jointplot(free_delta, debug['mu_hat'], kind="kde", xlim=(-z,z), ax=ax2[1,i])
    #hypo_g = sns.jointplot(hypo_delta, debug['mu_hat'], kind="kde", xlim=(-z,z), ax=ax2[2,i])
    #free_g.ax_joint.axhline(0,c='r',ls='--')
    #free_g.ax_joint.axvline(0,c='r',ls='--')
    #hypo_g.ax_joint.axhline(0,c='r',ls='--')
    #hypo_g.ax_joint.axvline(0,c='r',ls='--')

    plt.show()

예제 #8

0

파일 보기

파일: test_unbinned_gen.py 프로젝트: fastprof-hep/fastprof

import numpy as np
import matplotlib.pyplot as plt
from fastprof import Model, ScanSampler, OptiSampler

model = Model.create('models/highMass-1164.json')

gen_mu = 3
print('Will generate the following hypothesis: ', gen_mu)

np.random.seed(131071)
dist = OptiSampler(model,
                   test_hypo=model.expected_pars(0.1),
                   mu0=0.1,
                   poi_bounds=(0, 2),
                   method='scalar').generate(1000)
dist.save('test')

예제 #9

0

파일 보기

from fastprof import Model
import matplotlib.pyplot as plt

model = Model.create('run/high_mass_gg_1300.json')
pars = model.expected_pars(10)
data = model.generate_data(pars)
plt.ion()
plt.figure(1)
model.plot(pars, data=data, variations=[('dEff', 5, 'r'), ('xi', -10, 'g')])
plt.yscale('log')

예제 #10

0

파일 보기

파일: fit_fast.py 프로젝트: fastprof-hep/fastprof

def run(argv=None):
    parser = make_parser()
    options = parser.parse_args()
    if not options:
        parser.print_help()
        return

    model = Model.create(options.model_file)
    if model == None:
        raise ValueError('No valid model definition found in file %s.' %
                         options.model_file)
    if options.regularize is not None:
        model.set_gamma_regularization(options.regularize)
    if options.cutoff is not None: model.cutoff = options.cutoff
    if options.setrange is not None: process_setranges(options.setrange, model)

    if options.data_file:
        data = Data(model).load(options.data_file)
        if data is None:
            raise ValueError('No valid dataset definition found in file %s.' %
                             options.data_file)
        print('Using dataset stored in file %s.' % options.data_file)
    elif options.asimov is not None:
        try:
            sets = process_setvals(options.asimov, model)
        except Exception as inst:
            print(inst)
            raise ValueError("ERROR : invalid POI specification string '%s'." %
                             options.asimov)
        data = model.generate_expected(sets)
        print('Using Asimov dataset with parameters %s' % str(sets))
    else:
        data = Data(model).load(options.model_file)

    if options.hypo is not None:
        try:
            sets = process_setvals(options.hypo, model)
        except Exception as inst:
            print(inst)
            raise ValueError("ERROR : invalid POI specification string '%s'." %
                             options.hypo)
        hypo_pars = model.expected_pars(sets)

    opti = OptiMinimizer().set_pois_from_model(model)
    min_nll = opti.minimize(data)
    min_pars = opti.min_pars
    print('\n== Best-fit: nll = %g @ at parameter values =' % min_nll)
    print(min_pars)

    if options.hypo is not None:
        tmu = opti.tmu(hypo_pars, data, hypo_pars)
        print('\n== Profile-likelihood ratio tmu = %g for hypothesis' % tmu,
              hypo_pars.dict(pois_only=True))
        print('-- Profiled NP values :\n' + str(opti.hypo_pars))
        if len(model.pois) == 1:
            print('\n== Computing the q~mu test statistic')
            asimov = model.generate_expected(0, NPMinimizer(data))
            calc = QMuTildaCalculator(opti)
            plr_data = calc.compute_fast_q(hypo_pars, data)
            print('best-fit %s = % g' %
                  (model.poi(0).name, opti.free_pars.pois[0]))
            print('tmu         = % g' % plr_data.test_statistics['tmu'])
            print('q~mu        = % g' % plr_data.test_statistics['q~mu'])
            print('pv          = % g' % plr_data.pvs['pv'])
            print('cls         = % g' % plr_data.pvs['cls'])
    plt.ion()
    plt.figure(1)
    model.plot(min_pars, data=data)
    if options.log_scale: plt.yscale('log')

예제 #11

0

파일 보기

def run(argv=None):
    parser = make_parser()
    options = parser.parse_args()
    if not options:
        parser.print_help()
        return

    # Define the model
    model = Model.create(options.model_file)
    if model == None:
        raise ValueError('No valid model definition found in file %s.' %
                         options.model_file)
    if options.regularize is not None:
        model.set_gamma_regularization(options.regularize)
    if options.cutoff is not None: model.cutoff = options.cutoff
    if options.setrange is not None: process_setranges(options.setrange, model)

    # Define the data
    if options.data_file:
        data = Data(model).load(options.data_file)
        if data is None:
            raise ValueError('No valid dataset definition found in file %s.' %
                             options.data_file)
        print('Using dataset stored in file %s.' % options.data_file)
    elif options.asimov is not None:
        try:
            sets = process_setvals(options.asimov, model)
        except Exception as inst:
            print(inst)
            raise ValueError("ERROR : invalid POI specification string '%s'." %
                             options.asimov)
        data = model.generate_expected(sets)
        print('Using Asimov dataset with parameters %s' % str(sets))
    else:
        data = Data(model).load(options.model_file)

    # Parse the hypothesis values
    if options.hypos.find(':'):
        try:
            hypo_specs = options.hypos.split(':')
            poi_name = None
            if hypo_specs[-1] == 'log':
                hypos = np.logspace(
                    1,
                    math.log(float(hypo_specs[-3])) /
                    math.log(float(hypo_specs[-4])),
                    int(hypo_specs[-2]) + 1, True, float(hypo_specs[0]))
                if len(hypo_specs) == 5: poi_name = hypo_specs[0]
            else:
                hypos = np.linspace(float(hypo_specs[-3]),
                                    float(hypo_specs[-2]),
                                    int(hypo_specs[-1]) + 1)
                if len(hypo_specs) == 4: poi_name = hypo_specs[0]
        except Exception as inst:
            print(inst)
            raise ValueError(
                "Could not parse list of hypothesis values '%s' : expected min:max:num[:log] format"
                % options.hypos)
        if poi_name is not None:
            if not poi_name in model.pois:
                raise ValueError("Unknown POI '%s' in hypothesis definitions" %
                                 poi_name)
        else:
            poi_name = model.poi(0).name
        hypo_sets = [{poi_name: hypo} for hypo in hypos]
    else:
        try:
            hypo_sets = [
                process_setvals(spec, model, match_nps=False)
                for spec in options.hypos.split('/')
            ]
        except Exception as inst:
            print(inst)
            raise ValueError(
                "Could not parse list of hypothesis values '%s' : expected /-separated list of POI assignments"
                % options.hypos)
    hypos = [model.expected_pars(sets) for sets in hypo_sets]

    # Compute the tmu values
    calc = TMuCalculator(
        OptiMinimizer(niter=options.iterations).set_pois_from_model(model))
    raster = calc.compute_fast_results(hypos, data)
    hypos = [hypo[poi_name] for hypo in raster.plr_data.keys()]
    tmus = [
        plr_data.test_statistics['tmu']
        for plr_data in raster.plr_data.values()
    ]
    #print(raster)

    # Find the minimal tmu
    min_index = np.argmin(tmus)
    if min_index == 0:
        print(
            'Found minimum at the lower edge of the scan, returning this value'
        )
        min_hypo = hypos[min_index]
    elif min_index == len(tmus):
        print(
            'Found minimum at the upper edge of the scan, returning this value'
        )
        min_hypo = hypos[min_index]
    else:
        calc.minimizer.minimize(data, list(raster.plr_data.keys())[min_index])
        min_hypo = calc.minimizer.min_pars[poi_name]

    # Compute the tmu=1 crossings and uncertainties
    crossings = raster.interpolate(hypos, tmus, 1)
    if len(crossings) == 2:
        print('1-sigma interval : %g + %g - %g' %
              (min_hypo, crossings[1] - min_hypo, min_hypo - crossings[0]))

    # Plot the result
    plt.ion()
    plt.figure(1)
    plt.plot(hypos, tmus)
    plt.ylim(0, None)
    plt.xlabel(poi_name)
    plt.ylabel('t_mu(%s)' % poi_name)

예제 #12

0

파일 보기

import numpy as np
import matplotlib.pyplot as plt
from fastprof import Model, Data, Parameters, NPMinimizer, OptiMinimizer

filename = 'run/high_mass_gg_1300.json'

model = Model.create(filename)
ws_data = Data(model).load(filename)

#n = np.array([6, 4, 2, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0 ])
#aux_a = np.array([-1])
#aux_b = np.array([])
#data = Data(model, n, aux_a, aux_b)
# data.set_expected(Parameters(mu))

print(model)

opti = OptiMinimizer(ws_data)
opti.minimize()
min_pars = opti.min_pars
print(min_pars)

mu = 1
print('mu = ', mu)

mini = NPMinimizer(mu, ws_data)
mini.profile()
min1 = mini.min_pars
print(min1, model.nll(min1, ws_data))
print('mu = ', min_pars.mu)

예제 #13

0

파일 보기

test_mus = np.linspace(0,5,6)
n_np = 2
bkg = np.array([1.0, 10.0])  # specific to this case!
sig = np.array([1.0, 0])  # specific to this case!
a = np.array([[0.2], [0.2]]) # specific to this case!
b = np.array([[0.2], [0.2]]) # specific to this case!
#pyhf_data = np.array([ 6.00000000e+00,  9.00000000e+00,  1.29864346e-01, -5.74496450e-01 ])
pyhf_data = np.array([ 2,  10,  0, 0 ])

spec = json.load(open('fastprof/models/test1.json', 'r'))
ws = pyhf.Workspace(spec)
pyhf_model = ws.model()

for mu in test_mus :
  print('-------------------------------------')
  print('Testing the following hypothesis: ', mu)
  model = Model(sig, bkg, alphas = ['acc_sys'], betas = ['bkg_sys'], a=a, b=b)
  data = Data(model).set_from_pyhf_data(pyhf_data, pyhf_model)
  #print(model)
  npm = NPMinimizer(mu, data)
  nll = npm.profile_nll()
  pars, val  = pyhf.infer.mle.fixed_poi_fit(mu, pyhf_data, pyhf_model, return_fitted_val=True)
  print('fast pars |', npm.min_pars.array())
  print('fast nexp |', model.s_exp(npm.min_pars), model.b_exp(npm.min_pars), model.n_exp(npm.min_pars))
  print(nll)
  pyhf_pars = Parameters(pars[0], pars[1], pars[2])
  print('pyhf data |', pyhf_data)
  print('pyhf pars |', pyhf_pars.array())
  print('pyhf nexp |', model.s_exp(pyhf_pars), model.b_exp(pyhf_pars), model.n_exp(pyhf_pars))
  print(model.nll(pyhf_pars, data))

예제 #14

0

파일 보기

파일: test_fastprof.py 프로젝트: fastprof-hep/fastprof

from fastprof import Model, Data, NPMinimizer, OptiMinimizer
import numpy as np

mu = 3.7
print('mu = ', mu)
n = np.array([7, 12])
sig = np.array([1.0, 0]) * mu
bkg = np.array([1.0, 10.0])

aux_a = np.array([1])
aux_b = np.array([-1.6])

a = np.array([[0.2], [0.2]])
b = np.array([[0.2], [0.2]])

model = Model(sig, bkg, alphas=['acc_sys'], betas=['bkg_sys'], a=a, b=b)
data = Data(model, n, aux_a, aux_b)

print(model)

best_a, best_b, best_c = NPMinimizer(mu, data).profile()
print('hat(a) =', best_a)
print('hat(b) =', best_b)
print('hat(c) =', best_c)

#der_a, der_b = fastprof.derivatives(model, best_a, best_b)
#der_a, der_b = model.derivatives(best_a, best_b)
#print('dl/da =', der_a)
#print('dl/db =', der_b)

예제 #15

0

파일 보기

def run(argv = None) :
  parser = make_parser()
  options = parser.parse_args()
  if not options :
    parser.print_help()
    return

  model = Model.create(options.model_file)
  if model is None : raise ValueError('No valid model definition found in file %s.' % options.model_file)
  if options.channel is not None and not options.channel in model.channels() : raise KeyError('Channel %s not found in model.' % options.channel)

  if options.data_file is not None :
    data = Data(model).load(options.data_file)
    if data is None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file)
    print('Using dataset stored in file %s.' % options.data_file)
  elif options.asimov is not None :
    try:
      sets = process_setvals(options.asimov, model)
      data = model.generate_expected(sets)
    except Exception as inst :
      print(inst)
      raise ValueError("Cannot define an Asimov dataset from options '%s'." % options.asimov)
    print('Using Asimov dataset with POIs %s.' % str(sets))
  else :
    data = Data(model).load(options.model_file)

  if options.setval is not None :
    try :
      poi_dict = process_setvals(options.setval, model, match_nps = False)
    except Exception as inst :
      print(inst)
      raise ValueError("ERROR : invalid POI specification string '%s'." % options.setval)
    pars = model.expected_pars(poi_dict)
    if options.profile :
      mini = OptiMinimizer()
      mini.profile_nps(pars, data)
      print('Minimum: nll = %g @ parameter values : %s' % (mini.min_nll, mini.min_pars))
      pars = mini.min_pars
  elif data is not None and options.profile :
    mini = OptiMinimizer().set_pois_from_model(model)
    mini.minimize(data)
    pars = mini.min_pars
  else :
    pars = model.expected_pars([0]*model.npois)

  xmin = None
  xmax = None
  ymin = None
  ymax = None
  if options.x_range is not None :
    try:
      xmin, xmax = [ float(p) for p in options.x_range.split(',') ]
    except Exception as inst :
      print(inst)
      raise ValueError('Invalid X-axis range specification %s, expected x_min,x_max' % options.x_range)

  if options.y_range is not None :
    try:
      ymin, ymax = [ float(p) for p in options.y_range.split(',') ]
    except Exception as inst :
      print(inst)
      raise ValueError('Invalid Y-axis range specification %s, expected y_min,y_max' % options.y_range)

  plt.ion()
  if not options.residuals :
    plt.figure(1, figsize=(8, 8), dpi=96)
    model.plot(pars, data=data, labels=options.variations is None)
    if options.plot_without is not None or options.plot_alone is not None :
      model.plot(pars, only=options.plot_alone, exclude=options.plot_without, labels=options.variations is None)
    if options.log_scale : plt.yscale('log')
    if xmin is not None : plt.xlim(xmin, xmax)
    if ymin is not None : plt.ylim(ymin, ymax)
  else :
    fig1, ax1 = plt.subplots(nrows=2, ncols=1, figsize=(8, 8), dpi=96)
    model.plot(pars, data=data, canvas=ax1[0])
    if options.log_scale : ax1[0].set_yscale('log')
    if xmin is not None : ax1[0].set_xlim(xmin, xmax)
    if ymin is not None : ax1[0].set_ylim(ymin, ymax)
    model.plot(pars, data=data, canvas=ax1[1], residuals=options.residuals, labels=options.variations is None)
  if options.output_file is not None : plt.savefig(options.output_file)

  variations = None
  colors_pos = [ 'purple', 'green', 'darkblue', 'lime' ]
  colors_neg = [ 'darkred', 'red', 'orange', 'magenta' ]
  if options.variations is not None :
    # First try the comma-separated format
    try:
      var_val = float(options.variations)
      variations = 'all'
    except :
      pass
    if variations == None :
      variations = []
      try :
        for spec in options.variations.split(',') :
          specfields = spec.split(':')
          varval = specfields[0]
          color = specfields[1] if len(specfields) == 2 else None 
          var,val = varval.split('=')
          try :
            val = float(val)
          except:
            raise ValueError('Invalid numerical value %s.' % val)
        if not var in model.nps :
          raise KeyError('Parameter %s is not defined in the model.' % var)
        colors = colors_pos if val > 0 else colors_neg
        if color is None : color = colors[len(variations) % len(colors)]
        variations.append( (var, val, color,) )
      except Exception as inst :
        print(inst)
        raise ValueError('Invalid variations specification %s : should be a comma-separated list of var=val[:color] items, or a single number' % options.variations)

  if variations == 'all' :
    n1 = math.ceil(math.sqrt(model.nnps))
    n2 = math.ceil(model.nnps/n1)
    fig_nps, ax_nps = plt.subplots(nrows=n1, ncols=n2, figsize=(18, 12), dpi=96)
    for i in range(len(model.nps), n1*n2) : fig_nps.delaxes(ax_nps.flatten()[i])
    for par, ax in zip(model.nps, ax_nps.flatten()) :
      model.plot(pars, data=data, variations = [ (par, var_val, 'r'), (par, -var_val, 'g') ], canvas=ax)
      if options.plot_without is not None or options.plot_alone is not None :
        model.plot(pars, variations = [ (par, var_val, 'r'), (par, -var_val, 'g') ], canvas=ax, only=options.plot_alone, exclude=options.plot_without)
      if options.log_scale : ax.set_yscale('log')
      if xmin is not None : ax.set_xlim(xmin, xmax)
      if ymin is not None : ax.set_ylim(ymin, ymax)
  elif variations is not None :
    plt.figure(1)
    model.plot(pars, variations=variations)
    if options.plot_without is not None or options.plot_alone is not None :
      model.plot(pars, variations=variations, only=options.plot_alone, exclude=options.plot_without)
    if options.log_scale : plt.yscale('log')
  if options.output_file is not None :
    split_name = os.path.splitext(options.output_file)
    plt.savefig(split_name[0] + '_variations' + split_name[1])

예제 #16

0

파일 보기

def run(argv=None):
    parser = make_parser()

    options = parser.parse_args()
    if not options:
        parser.print_help()
        sys.exit(0)

    try:
        bins = [int(b) for b in options.bins.split(',')]
    except Exception as inst:
        print(inst)
        raise ValueError(
            'Invalid bin specification %s : the format should be bin1,bin2,...'
            % options.bins)

    if options.yrange != '':
        try:
            y_min, y_max = [float(p) for p in options.yrange.split(',')]
        except Exception as inst:
            print(inst)
            raise ValueError(
                'Invalid range specification %s, expected y_min,y_max' %
                options.yrange)

    model = Model().load(options.model_file)
    if model is None:
        raise ValueError('No valid model definition found in file %s.' %
                         options.model_file)
    if not options.cutoff is None: model.cutoff = options.cutoff

    if options.channel != None:
        channel = model.channel(options.channel)
        if not channel:
            raise KeyError('Channel %s not found in model.' % options.channel)
    else:
        channel = list(model.channels.values())[0]

    if options.sample != None:
        sample = channel.sample(options.sample)
        if not sample:
            raise KeyError('Sample %s not found in channel %s.' %
                           (options.sample, channel.name))
    else:
        sample = list(channel.samples.values())[0]

    if options.validation_file is not None:
        validation_file = options.validation_file
    else:
        split_name = os.path.splitext(options.model_file)
        validation_file = split_name[0] + '_validation' + split_name[1]

    data = ValidationData(model, validation_file)
    print('Validating for POI value %s = %g' % (model.poi(0).name, data.poi))
    plt.ion()
    nplots = model.nnps
    nc = math.ceil(math.sqrt(nplots))
    nr = math.ceil(nplots / nc)

    cont_x = np.linspace(data.points[0], data.points[-1], 100)

    pars = model.expected_pars(data.poi)
    channel_offset = model.channel_offsets[channel.name]
    sample_index = model.sample_indices[sample.name]
    nexp0 = model.n_exp(pars)[sample_index,
                              channel_offset:channel_offset + channel.nbins()]
    ax_vars = []
    ax_invs = []

    def nexp_var(pars, par, x):
        return model.n_exp(pars.set(
            par, x))[sample_index,
                     channel_offset:channel_offset + channel.nbins()]

    for b in bins:
        fig = plt.figure(figsize=(8, 8), dpi=96)
        fig.suptitle('Linearity checks for sample %s, bin [%g, %g]' %
                     (sample.name, channel.bins[b]['lo_edge'],
                      channel.bins[b]['hi_edge']))
        gs = gridspec.GridSpec(nrows=nr,
                               ncols=nc,
                               wspace=0.3,
                               hspace=0.3,
                               top=0.9,
                               bottom=0.05,
                               left=0.1,
                               right=0.95)
        for i, par in enumerate(model.nps.keys()):
            if options.inversion_plots:
                sgs = gridspec.GridSpecFromSubplotSpec(2,
                                                       1,
                                                       subplot_spec=gs[i // nc,
                                                                       i % nc],
                                                       wspace=0.1,
                                                       hspace=0.1)
            else:
                sgs = [gs[i // nc, i % nc]]
            pars = model.expected_pars(data.poi)
            model.use_linear_nps = True
            vars_lin = [
                nexp_var(pars, par, x)[b] / nexp0[b] - 1 for x in cont_x
            ]
            rvar_lin = [
                -((nexp_var(pars, par, x)[b] - nexp0[b]) / nexp0[b])**2
                for x in cont_x
            ]
            model.use_linear_nps = False
            vars_nli = [
                nexp_var(pars, par, x)[b] / nexp0[b] - 1 for x in cont_x
            ]
            rvar_nli = [
                -((nexp_var(pars, par, x)[b] - nexp0[b]) / nexp0[b])**2
                for x in cont_x
            ]

            ax_var = fig.add_subplot(sgs[0])
            ax_var.set_title(par)
            ax_var.plot(
                data.points,
                data.variations[channel.name][par][sample_index, b, :] - 1,
                'ko')
            ax_var.plot([0], [0], 's', marker='o', color='purple')
            ax_var.plot(sample.pos_vars[par],
                        sample.pos_imps[par][:, b],
                        's',
                        marker='o',
                        color='red')
            ax_var.plot(sample.neg_vars[par],
                        sample.neg_imps[par][:, b],
                        's',
                        marker='o',
                        color='red')
            ax_var.plot(cont_x, vars_lin, 'r--')
            ax_vars.append(ax_var)
            if not options.no_nli: ax_var.plot(cont_x, vars_nli, 'b')
            if options.yrange: ax_var.set_ylim(y_min, y_max)
            if options.inversion_plots:
                ax_inv = fig.add_subplot(sgs[1], sharex=ax_var)
                ax_inv.plot(cont_x, rvar_lin, 'r--')
                ax_inv.plot(cont_x, rvar_nli, 'b')
                if options.inv_range: ax_inv.set_ylim(-options.inv_range, 0)
                ax_invs.append(ax_inv)
        fig.canvas.set_window_title('Linearity checks for sample %s, bin  %g' %
                                    (sample.name, b))

        if options.output_file != '':
            if options.output_file is not None:
                output_file = options.output_file
            else:
                split_name = os.path.splitext(options.model_file)
                output_file = split_name[0] + '-%s-bin_%d.png' % (
                    options.sample, b)
            plt.savefig(output_file)

예제 #17

0

파일 보기

def run(argv = None) :
  parser = make_parser()
  options = parser.parse_args(argv)
  if not options :
    parser.print_help()
    sys.exit(0)

  model = Model.create(options.model_file)
  if model is None : raise ValueError('No valid model definition found in file %s.' % options.model_file)
  if not options.regularize is None : model.set_gamma_regularization(options.regularize)
  if not options.cutoff is None : model.cutoff = options.cutoff

  try :
    hypos = [ Parameters(process_setvals(spec, model), model=model) for spec in options.hypos.split(':') ]
  except Exception as inst :
    print(inst)
    raise ValueError("Could not parse list of hypothesis values '%s' : expected colon-separated list of variable assignments" % options.hypos)

  if options.data_file :
    data = Data(model).load(options.data_file)
    if data == None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file)
    print('Using dataset stored in file %s.' % options.data_file)
  elif options.asimov != None :
    try:
      sets = [ v.replace(' ', '').split('=') for v in options.asimov.split(',') ]
      data = model.generate_expected(sets)
    except Exception as inst :
      print(inst)
      raise ValueError("Cannot define an Asimov dataset from options '%s'." % options.asimov)
    print('Using Asimov dataset with POIs %s.' % str(sets))
  else :
    data = Data(model).load(options.model_file)
    if data == None : raise ValueError('No valid dataset definition found in file %s.' % options.data_file)
    print('Using dataset stored in file %s.' % options.model_file)

  gen_bounds = []
  if options.bounds :
    bound_specs = options.bounds.split(',')
    try :
      for spec in bound_specs :
        fields = spec.split(':')
        gen_bounds.append(ParBound(fields[0], float(fields[1]) if fields[1] != '' else None, float(fields[2]) if fields[2] != '' else None))
    except Exception as inst:
      print('ERROR: could not parse parameter bound specification "%s", expected in the form name1:[min]:[max],name2:[min]:[max],...' % options.bounds)
      raise(inst)

  if options.test_statistic == 'q~mu' :
    if len(model.pois) > 1 : raise ValueError('Currently not supporting more than 1 POI for this operation')
    calc = QMuTildaCalculator(OptiMinimizer().set_pois_from_model(model))
  elif options.test_statistic == 'q_mu' :
    if len(model.pois) > 1 : raise ValueError('Currently not supporting more than 1 POI for this operation')
    calc = QMuCalculator(OptiMinimizer().set_pois_from_model(model))
  else :
    raise ValueError('Unknown test statistic %s' % options.test_statistic)

  # Check the fastprof CLs against the ones in the reference: in principle this should match well,
  # otherwise it means what we generate isn't exactly comparable to the observation, which would be a problem...
  if options.ntoys > 0 : 
    print('Check CL computed from fast model against those of the full model (a large difference would require to correct the sampling distributions) :')
  faster = calc.compute_fast_results(hypos, data)
  faster.print(verbosity = options.verbosity)
  if options.ntoys == 0 : return

  if options.seed != None : np.random.seed(options.seed)
  niter = options.iterations
  samplers_clsb = []
  samplers_cl_b = []

  print('Running with POI %s, bounds %s, and %d iteration(s).' % (str(calc.minimizer.init_pois.dict(pois_only=True)), str(calc.minimizer.bounds), niter))

  for fast_plr_data in faster.plr_data.values() :
    test_hypo = fast_plr_data.hypo
    gen_hypo = test_hypo
    tmu_A0 = fast_plr_data.test_statistics['tmu_A0']
    gen0_hypo = gen_hypo.clone().set(model.poi(0).name, 0)
    clsb = OptiSampler(model, test_hypo, print_freq=options.print_freq, bounds=gen_bounds, debug=options.debug, niter=niter, tmu_Amu=tmu_A0, tmu_A0=tmu_A0, gen_hypo=gen_hypo)
    cl_b = OptiSampler(model, test_hypo, print_freq=options.print_freq, bounds=gen_bounds, debug=options.debug, niter=niter, tmu_Amu=tmu_A0, tmu_A0=tmu_A0, gen_hypo=gen0_hypo)
    clsb.minimizer.set_pois_from_model(model)
    cl_b.minimizer.set_pois_from_model(model)
    samplers_clsb.append(clsb)
    samplers_cl_b.append(cl_b)

  opti_samples = CLsSamples( \
    Samples(samplers_clsb, options.output_file), \
    Samples(samplers_cl_b, options.output_file + '_clb')) \
    .generate_and_save(options.ntoys, break_locks=options.break_locks)

  if options.truncate_dist : opti_samples.cut(None, options.truncate_dist)

  poi = faster.pois()[list(faster.pois())[0]]

  for plr_data in faster.plr_data.values() :
    plr_data.pvs['sampling_pv' ] = opti_samples.clsb.pv(plr_data.hypo, plr_data.pvs['pv'], with_error=True)
    plr_data.pvs['sampling_clb'] = opti_samples.cl_b.pv(plr_data.hypo, plr_data.pvs['pv'], with_error=True)
    plr_data.pvs['sampling_cls'] = opti_samples.pv     (plr_data.hypo, plr_data.pvs['pv'], with_error=True)

  if options.bands :
    sampling_bands = opti_samples.bands(options.bands)
    for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) :
      for plr_data, band_point in zip(faster.plr_data.values(), sampling_bands[band]) : plr_data.pvs['sampling_cls_%+d' % band] = band_point

  def limit(rast, key, description, with_error=False) :
    limit_result = rast.contour(key, 1 - options.cl, with_error=with_error)
    limit_value = limit_result if not with_error else limit_result[0]
    error_str = ''
    if with_error :
      limit_error = (limit_result[1] - limit_result[2])/2 if limit_result[1] is not None and limit_result[2] is not None else None
      error_str = '+/- %g' % limit_error if not limit_error is None else ''
    if not limit_value is None : print(description + ' : UL(%g%%) = %g %s (N = %s)' % (100*options.cl, limit_value, error_str, str(model.n_exp(model.expected_pars(limit_value)).sum(axis=1))) )
    return limit_result

  faster.print(keys=[ 'sampling_pv', 'sampling_cls', 'sampling_clb' ], verbosity=1)

  limit_asy_full_clsb = limit(faster, 'pv'          , 'Asymptotics, fast model, CLsb')
  limit_sampling_clsb = limit(faster, 'sampling_pv' , 'Sampling   , fast model, CLsb', with_error=True)
  limit_asy_full_cls  = limit(faster, 'cls'         , 'Asymptotics, fast model, CLs ')
  limit_sampling_cls  = limit(faster, 'sampling_cls', 'Sampling   , fast model, CLs ', with_error=True)

  if options.bands :
    limit_sampling_cls_bands = {}
    for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) :
      limit_sampling_cls_bands[band] = limit(faster, 'sampling_cls_%+d' % band, 'Expected limit band, fast model, %+d sigma band' % band)

  # Plot results
  if not options.batch_mode :
    plt.ion()
    fig1 = plt.figure(1)
    plt.suptitle('$CL_{s+b}$')
    plt.xlabel(model.poi(0).name)
    plt.ylabel('$CL_{s+b}$')
    plt.fill_between([ hypo[poi.name] for hypo in faster.plr_data ],
                     [ plr_data.pvs['sampling_pv'][0] + plr_data.pvs['sampling_pv'][1] for plr_data in faster.plr_data.values() ],
                     [ plr_data.pvs['sampling_pv'][0] - plr_data.pvs['sampling_pv'][1] for plr_data in faster.plr_data.values() ], facecolor='b', alpha=0.5)
    plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['pv']             for plr_data in faster.plr_data.values() ], options.marker + 'r:' , label = 'Asymptotics')
    plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_pv'][0] for plr_data in faster.plr_data.values() ], options.marker + 'b-' , label = 'Sampling')

    plt.legend(loc=1) # 1 -> upper right
    plt.axhline(y=1 - options.cl, color='k', linestyle='dotted')

    fig2 = plt.figure(2)
    plt.suptitle('$CL_s$')
    plt.xlabel(model.poi(0).name)
    plt.ylabel('$CL_s$')
    if options.bands :
      opti_samples.plot_bands(options.bands)
    plt.fill_between([ hypo[poi.name] for hypo in faster.plr_data ],
                     [ plr_data.pvs['sampling_cls'][0] + plr_data.pvs['sampling_cls'][1] for plr_data in faster.plr_data.values() ],
                     [ plr_data.pvs['sampling_cls'][0] - plr_data.pvs['sampling_cls'][1] for plr_data in faster.plr_data.values() ], facecolor='b', alpha=0.5)
    plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['cls']          for plr_data in faster.plr_data.values() ], options.marker + 'r:' , label = 'Asymptotics')
    plt.plot([ hypo[poi.name] for hypo in faster.plr_data ], [ plr_data.pvs['sampling_cls'][0] for plr_data in faster.plr_data.values() ], options.marker + 'b-' , label = 'Sampling')
    plt.legend(loc=1) # 1 -> upper right
    plt.axhline(y=1 - options.cl, color='k', linestyle='dotted')
    fig1.savefig(options.output_file + '_clsb.pdf')
    fig2.savefig(options.output_file + '_cls.pdf')
    fig2.savefig(options.output_file + '_cls.png')
    plt.show()

  jdict = {}
  jdict['cl'] = options.cl
  jdict['poi_name'] = model.poi(0).name
  jdict['poi_unit'] = model.poi(0).unit
  jdict['limit_sampling_CLs']    = limit_sampling_cls[0]
  jdict['limit_sampling_CLs_up'] = limit_sampling_cls[1]
  jdict['limit_sampling_CLs_dn'] = limit_sampling_cls[2]
  jdict['limit_asymptotics_CLs'] = limit_asy_full_cls
  jdict['limit_sampling_CLsb']    = limit_sampling_clsb[0]
  jdict['limit_sampling_CLsb_up'] = limit_sampling_clsb[1]
  jdict['limit_sampling_CLsb_dn'] = limit_sampling_clsb[2]
  jdict['limit_asymptotics_CLsb'] = limit_asy_full_clsb

  if options.bands :
    for band in np.linspace(-options.bands, options.bands, 2*options.bands + 1) :
      jdict['limit_sampling_CLs_expected_band_%+d' % band] = limit_sampling_cls_bands[band]

  with open(options.output_file + '_results.json', 'w') as fd:
    json.dump(jdict, fd, ensure_ascii=True, indent=3)

예제 #18

0

파일 보기

파일: dump_samples.py 프로젝트: fastprof-hep/fastprof

def run(argv=None):
    parser = make_parser()

    options = parser.parse_args()
    if not options:
        parser.print_help()
        sys.exit(0)

    samples = np.load(options.filename[0])

    if options.x_range:
        try:
            x_min, x_max = [float(p) for p in options.x_range.split(',')]
        except Exception as inst:
            print(inst)
            raise ValueError(
                'Invalid X-axis range specification %s, expected x_min,x_max' %
                options.x_range)
    else:
        if options.t_value == '':
            x_min, x_max = 0, 1
        else:
            x_min, x_max = -10, 10

    plr_data = None
    if options.hypo != '':
        model = Model.create(options.model_file)
        if model == None:
            raise ValueError('No valid model definition found in file %s.' %
                             options.model_file)
        try:
            filename, index = options.hypo.split(':')
            index = int(index)
            raster = Raster('data', model=model, filename=filename)
            plr_data = list(raster.plr_data.values())[index]
            hypo = list(raster.plr_data.keys())[index]
            print('Using hypothesis %s' % str(hypo.dict(pois_only=True)))
        except Exception as inst:
            print(inst)
            raise ValueError(
                'Invalid hypothesis spec, should be in the format <filename>:<index>'
            )

    plt.ion()
    if options.log_scale: plt.yscale('log')
    plt.suptitle(options.filename[0])

    if options.t_value == 'q_mu':
        if plr_data is None:
            raise ValueError(
                'A signal hypothesis must be provided (--hypo option) to convert to q_mu values'
            )
        q = QMuCalculator.make_q(plr_data)
        data = np.array([q.asymptotic_ts(pv) for pv in samples])
        plt.hist(data[:], bins=options.nbins, range=[x_min, x_max])
    elif options.t_value == 'q~mu':
        if plr_data is None:
            raise ValueError(
                'A signal hypothesis must be provided (--hypo option) to convert to q~mu values'
            )
        q = QMuTildaCalculator.make_q(plr_data)
        data = np.array([q.asymptotic_ts(pv) for pv in samples])
        plt.hist(data[:], bins=options.nbins, range=[x_min, x_max])
    else:
        plt.hist(samples[:], bins=options.nbins, range=[x_min, x_max])
    plt.show()

    if options.reference:
        xx = np.linspace(x_min, x_max, options.nbins + 1)
        dx = xx[1] - xx[0]
        bin_norm = len(samples)
        if options.t_value == 'q_mu' or options.t_value == 'q~mu':
            yy = [
                bin_norm * quad(lambda t: q.asymptotic_pdf(t), x, x + dx)[0]
                for x in xx[:-1]
            ]
        else:
            yy = [bin_norm * dx for x in xx[:-1]]
        plt.plot(xx[:-1] + dx / 2, yy)
        plt.ylim(1E-1)

    if options.output_file != '': plt.savefig(options.output_file)