def plotdata_from_histoColumn(h): a = array.array('d') a.fromstring(h) pdata = plotutil.plotdata() xmin = a[0] xmax = a[1] nbins = len(a) - 4 binwidth = (xmax - xmin) / nbins pdata.x = [xmin + i*binwidth for i in range(nbins)] pdata.y = a[3:-1] pdata.color = '#000000' return pdata
def plotdata_from_histoColumn(h): a = array.array('d') a.fromstring(h) pdata = plotutil.plotdata() xmin = a[0] xmax = a[1] nbins = len(a) - 4 binwidth = (xmax - xmin) / nbins pdata.x = [xmin + i * binwidth for i in range(nbins)] pdata.y = a[3:-1] pdata.color = '#000000' return pdata
def nll_scan(model, input, n, npoints=100, range = [0.0, 3.0], adaptive_startvalues = True, parameter = 'beta_signal', signal_process_groups = None, nuisance_constraint = None, nuisance_prior_toys = None, signal_prior = 'flat', options = None): """ Evalaute the profile likelihood function in ``parameter`` at the values specified by ``npoints`` and ``range``. For the parameters ``model``, ``n``, ``signal_process_groups``, ``nuisance_constraint``, ``nuisance_prior_toys``, ``signal_prior``, and ``options`` refer to :ref:`common_parameters`. Parameters: * ``parameter`` - the model parameter the profile likelihood function is defined in; all other parameters will be "minimized out" * ``npoints``, ``range`` - the number of points and the range for ``parameter`` to evaluate the profile likelihood function at: ``parameter`` is fixed in turn to values ``range[0] + (range[1] - range[0]) / npoints * i`` -- where ``i`` runs from 0 to ``npoints - 1`` --, and all other parameters are "minimized out". * ``adaptive_startvalues`` - if ``True``, the minimizer will use the result of the previous fit as start value for the fit at the next scan point. This usually increases convergence speed, but might cause problems for some models in which the minimization stops too early. This can lead to an artificial structure of the profile likelihood function. If set to ``False``, the fit always starts at the same point (which corresponds to the most aprioi most probable parameter value). The return value is a nested dictionary: the first-level key is the signal process group id (see :ref:`what_is_signal` for a definition). The value is a list of length ``n`` of :class:`~theta_auto.plotutil.plotdata` instances, containing the negative profile log-likelihood values in the scan. In addition to those equidistant points of the scan, the point at the minimum of the negative log-likelihood is added, with a y-value of exactly 0.0. """ if signal_process_groups is None: signal_process_groups = model.signal_process_groups if options is None: options = Options() result = {} for spid, signal_processes in signal_process_groups.iteritems(): r = Run(model, signal_processes, signal_prior = signal_prior, input = input, n = n, producers = [NllScanProducer(model, signal_processes, nuisance_constraint, npoints = npoints, range = range, parameter = parameter, signal_prior = signal_prior, adaptive_startvalues = adaptive_startvalues)], nuisance_prior_toys = nuisance_prior_toys) r.run_theta(options) res = r.get_products(['nllscan__nll', 'nllscan__maxl']) histos = map(histogram_from_dbblob, res['nllscan__nll']) result[spid] = [] for h, maxl in zip(histos, res['nllscan__maxl']): pd = plotutil.plotdata() pd.set_histogram(h) # insert the minimum: imin = 0 while pd.x[imin] < maxl and imin < len(pd.x): imin += 1 pd.x.insert(imin, maxl) pd.y.insert(imin, 0.0) result[spid].append(pd) return result
def nll_scan(model, input, n, npoints=100, range=[0.0, 3.0], adaptive_startvalues=True, parameter='beta_signal', signal_process_groups=None, nuisance_constraint=None, nuisance_prior_toys=None, signal_prior='flat', options=None): """ Evalaute the profile likelihood function in ``parameter`` at the values specified by ``npoints`` and ``range``. For the parameters ``model``, ``n``, ``signal_process_groups``, ``nuisance_constraint``, ``nuisance_prior_toys``, ``signal_prior``, and ``options`` refer to :ref:`common_parameters`. Parameters: * ``parameter`` - the model parameter the profile likelihood function is defined in; all other parameters will be "minimized out" * ``npoints``, ``range`` - the number of points and the range for ``parameter`` to evaluate the profile likelihood function at: ``parameter`` is fixed in turn to values ``range[0] + (range[1] - range[0]) / npoints * i`` -- where ``i`` runs from 0 to ``npoints - 1`` --, and all other parameters are "minimized out". * ``adaptive_startvalues`` - if ``True``, the minimizer will use the result of the previous fit as start value for the fit at the next scan point. This usually increases convergence speed, but might cause problems for some models in which the minimization stops too early. This can lead to an artificial structure of the profile likelihood function. If set to ``False``, the fit always starts at the same point (which corresponds to the most aprioi most probable parameter value). The return value is a nested dictionary: the first-level key is the signal process group id (see :ref:`what_is_signal` for a definition). The value is a list of length ``n`` of :class:`~theta_auto.plotutil.plotdata` instances, containing the negative profile log-likelihood values in the scan. In addition to those equidistant points of the scan, the point at the minimum of the negative log-likelihood is added, with a y-value of exactly 0.0. """ if signal_process_groups is None: signal_process_groups = model.signal_process_groups if options is None: options = Options() result = {} for spid, signal_processes in signal_process_groups.iteritems(): r = Run(model, signal_processes, signal_prior=signal_prior, input=input, n=n, producers=[ NllScanProducer(model, signal_processes, nuisance_constraint, npoints=npoints, range=range, parameter=parameter, signal_prior=signal_prior, adaptive_startvalues=adaptive_startvalues) ], nuisance_prior_toys=nuisance_prior_toys) r.run_theta(options) res = r.get_products(['nllscan__nll', 'nllscan__maxl']) histos = map(histogram_from_dbblob, res['nllscan__nll']) result[spid] = [] for h, maxl in zip(histos, res['nllscan__maxl']): pd = plotutil.plotdata() pd.set_histogram(h) # insert the minimum: imin = 0 while pd.x[imin] < maxl and imin < len(pd.x): imin += 1 pd.x.insert(imin, maxl) pd.y.insert(imin, 0.0) result[spid].append(pd) return result
def model_plots(model, all_nominal_templates = False, shape_templates = False): plotdir = os.path.join(config.workdir, 'plots') observables = sorted(list(model.observables.keys())) processes = sorted(list(model.processes)) #TODO: more / better colors background_colors = ['#edd400', '#f57900', '#c17d11', '#73d216', '#3465a4', '#75507b', '#d3d7cf', '#555753'] signal_colors = ['#ef2929', '#cc0000', '#a40000'] if not os.path.exists(plotdir): os.mkdir(plotdir) f = open(os.path.join(config.workdir, 'model_plots.thtml'), 'w') print >> f, "<h2>Stackplots</h2>" print >> f, "<p>Everything normalized to expectation, i.e., to the normalization in the template input file, possibly scaled via the python script file.</p>" print >> f, "<p>Color Code:</p><ul>" i_bkg_col = 0 i_signal_col = 0 for p in processes: if p in model.signal_processes: color = signal_colors[i_signal_col] i_signal_col = (i_signal_col + 1) % len(signal_colors) else: color = background_colors[i_bkg_col] i_bkg_col = (i_bkg_col + 1) % len(background_colors) print >> f, '<li><span style="background: %s;"> </span> %s</li>' % (color, p) print >>f, '</ul>' for o in observables: background_pds = [] signal_pds = [] i_bkg_col = 0 i_signal_col = 0 for p in processes: hf = model.get_histogram_function(o, p) if hf is None: continue pd = plotutil.plotdata() pd.histo_triple(hf.get_nominal_histo()) xmin, xmax, data = hf.get_nominal_histo() binwidth = (xmax - xmin) / len(data) if p in model.signal_processes: pd.color = signal_colors[i_signal_col] i_signal_col = (i_signal_col + 1) % len(signal_colors) signal_pds.append(pd) else: pd.fill_color = background_colors[i_bkg_col] pd.lw = 1 pd.color = '#000000' i_bkg_col = (i_bkg_col + 1) % len(background_colors) background_pds.append(pd) data_histo = model.get_data_histogram(o) data_pd = None if data_histo is not None: data_pd = plotutil.plotdata() data_pd.color = '#000000' data_pd.histo_triple(data_histo) data_pd.yerrors = map(math.sqrt, data_pd.y) data_pd.circle = 'o' plotutil.make_stack(background_pds) plots = background_pds + signal_pds if data_pd is not None: plots.append(data_pd) plotutil.plot(plots, o, '$N / %.4g$' % binwidth, os.path.join(plotdir, '%s_stack.png' % o), xmin=xmin, xmax=xmax) print >> f, "<p>Observable '%s':<br /><img src=\"plots/%s_stack.png\" /></p>" % (o, o) if all_nominal_templates: print >> f, "<h2>All 'nominal' Templates</h2>" print >> f, "<p>Everything normalized to expectation, i.e., to the normalization in the template input file, possibly scaled via the python script file.</p>" for o in observables: for p in processes: hf = model.get_histogram_function(o,p) if hf is None: continue xmin, xmax, data = hf.get_nominal_histo() binwidth = (xmax - xmin) / len(data) pd = plotutil.plotdata() pd.x = [xmin + i*binwidth for i in range(len(data))] pd.y = data[:] pd.color = signal_colors[0] xlabel = o plotutil.plot([pd], xlabel, '$N / %.4g$' % binwidth, os.path.join(plotdir, '%s_%s.png' % (o, p)), xmin=xmin, xmax=xmax) print >> f, '<p>Observable "%s", Process "%s":<br/><img src="plots/%s_%s.png"/></p>' % (o, p, o, p) # make also one plot with all signal processes, and normalization versus ordering: pd_norm = plotutil.plotdata() pd_norm.x = [] pd_norm.y = [] pd_norm.as_histo = False pd_norm.color = '#000000' plots = [] i_signal_col = 0 x_to_y = {} for p in processes: if p not in model.signal_processes: continue hf = model.get_histogram_function(o,p) if hf is None: continue xmin, xmax, data = hf.get_nominal_histo() x_to_y[utils.extract_number(p)] = sum(data) binwidth = (xmax - xmin) / len(data) pd = plotutil.plotdata() pd.x = [xmin + i*binwidth for i in range(len(data))] pd.y = data[:] pd.color = signal_colors[i_signal_col] plots.append(pd) i_signal_col = (i_signal_col + 1) % len(signal_colors) for x in sorted(x_to_y.keys()): pd_norm.x.append(x) pd_norm.y.append(x_to_y[x]) plotutil.plot(plots, o, '$N / %.4g$' % binwidth, os.path.join(plotdir, '%s_signals.png' % o), xmin=xmin, xmax=xmax) plotutil.plot([pd_norm], 'signal process', '$N$', os.path.join(plotdir, '%s_norm_vs_signals.png' % o)) print >> f, '<p>Observable "%s", all signals: <br/><img src="plots/%s_signals.png"/></p>' % (o, o) print >> f, '<p>Observable "%s", signal normalization: <br/><img src="plots/%s_norm_vs_signals.png"/></p>' % (o, o) # (end if all_nominal_templates) if not shape_templates: f.close() return # shape comparison for morphed templates: color_nominal, color_plus, color_minus = '#333333', '#aa3333', '#3333aa' print >> f, "<h2>Shape Uncertainty Plots</h2>" print >> f, "<p>Color Code:</p><ul>" print >> f, "<li><span style=\"background: %s;\"> </span> nominal</li><li><span style=\"background: %s;\"> </span> plus</li><li><span style=\"background: %s;\"> </span> minus</li>" % (color_nominal, color_plus, color_minus) print >> f, "</ul>" print >> f, "<p>Processes not appearing in the tables do not have any shape uncertainty for this observable.</p>" print >> f, "<p>Click on an image to enlarge. If you have javascript, the image will be displayed on this page and you can click through all shape uncertainties of that observable \ (instead of clicking, you can also use the left/right key on the keyboard).</p>" for o in observables: print >> f, '<h3>Observable \'%s\'</h3>' % o # save the triples (o,p,u) for which there is a plot: opus = [] for p in model.get_processes(o): hf = model.get_histogram_function(o,p) for u in hf.syst_histos: xmin, xmax, data_nominal = hf.nominal_histo xmin, xmax, data_plus = hf.syst_histos[u][0] xmin, xmax, data_minus = hf.syst_histos[u][1] binwidth = (xmax - xmin) / len(data_nominal) pd = plotutil.plotdata(color = color_nominal, legend = 'nominal') pd.x = [xmin + i*binwidth for i in range(len(data_nominal))] pd.y = data_nominal pd_plus = plotutil.plotdata(color = color_plus, legend = 'plus variation') pd_plus.x = pd.x pd_plus.y = data_plus pd_plus.color = color_plus pd_minus = plotutil.plotdata(color = color_minus, legend = 'minus variation') pd_minus.x = pd.x pd_minus.y = data_minus name = '%s__%s__%s' % (o,p,u) plotutil.plot((pd, pd_plus, pd_minus), o, '$N / %.4g$' % binwidth, os.path.join(plotdir, name + '.png'), xmin=xmin, xmax = xmax) opus.append((o,p,u)) #make a table for this observable: t = table() t.add_column('process', 'process / uncertainty') us = sorted(list(set([u for o,p,u in opus]))) ps = sorted(list(set([p for o,p,u in opus]))) for u in us: t.add_column(u) for p in ps: t.set_column('process', p) for u in us: if (o,p,u) in opus: t.set_column(u, '<a href="plots/%s__%s__%s.png" rel="lightbox[%s]"><img src="plots/%s__%s__%s.png" width="200"/></a>' % (o,p,u,o,o,p,u)) else: t.set_column(u, '---') t.add_row() print >>f, t.html() if len(opus)==0: print >> f, '<p>no shape uncertainties for this observable</p>' f.close()
def model_plots_at(model, par_values, signal_stacked = False): plotdir = os.path.join(config.workdir, 'plots') processes = sorted(list(model.processes)) #TODO: more / better colors h = str(hash(str(par_values))) background_colors = ['#edd400', '#f57900', '#c17d11', '#73d216', '#3465a4', '#75507b', '#d3d7cf', '#555753'] signal_colors = ['#ef2929', '#cc0000', '#a40000'] if not os.path.exists(plotdir): os.mkdir(plotdir) text = '<p>Templates evaluated at:<p><ul>' for p in par_values: text+='<li>%s = %.2g</li>\n' % (p, par_values[p]) text += '</ul>' text += "<p>Everything normalized to expectation, i.e., to the normalization in the template input file, possibly scaled via the python script file.</p>" text += "<p>Color Code:</p><ul>" i_bkg_col = 0 i_signal_col = 0 for p in processes: if p in model.signal_processes: color = signal_colors[i_signal_col] i_signal_col = (i_signal_col + 1) % len(signal_colors) else: color = background_colors[i_bkg_col] i_bkg_col = (i_bkg_col + 1) % len(background_colors) text += '<li><span style="background: %s;"> </span> %s</li>' % (color, p) text += '</ul>' templates = get_shifted_templates(model, par_values) for o in templates: background_pds = [] signal_pds = [] i_bkg_col = 0 i_signal_col = 0 for p in templates[o]: pd = plotutil.plotdata() pd.histo_triple(templates[o][p]) xmin, xmax, data = templates[o][p] binwidth = (xmax - xmin) / len(data) if p in model.signal_processes: pd.color = signal_colors[i_signal_col] i_signal_col = (i_signal_col + 1) % len(signal_colors) signal_pds.append(pd) else: pd.fill_color = background_colors[i_bkg_col] pd.lw = 1 pd.color = '#000000' i_bkg_col = (i_bkg_col + 1) % len(background_colors) background_pds.append(pd) data_histo = model.get_data_histogram(o) data_pd = None if data_histo is not None: xmin, xmax, data = data_histo data_pd = plotutil.plotdata() data_pd.color = '#000000' data_pd.histo_triple(data_histo) data_pd.yerrors = map(math.sqrt, data_pd.y) data_pd.circle = 'o' plots = background_pds if signal_stacked: plots.extend(signal_pds) plotutil.make_stack(background_pds) else: plotutil.make_stack(background_pds) plots.extend(signal_pds) if data_pd is not None: plots.append(data_pd) plotutil.plot(plots, o, '$N / %.4g$' % binwidth, os.path.join(plotdir, '%s_stack%s.png' % (o, h)), xmin=xmin, xmax=xmax) text += "<p>Observable '%s':<br /><img src=\"plots/%s_stack%s.png\" /></p>" % (o, o, h) config.report.new_section('Model Plots at parameter values', text)