예제 #1
0
def create_count_plot_and_array(ax: plt.Figure, data: pd.DataFrame, x: str,
                                palette, x_label: str) -> np.ndarray:
    ax = sns.countplot(ax=ax, data=data, x=x, palette=palette)
    ax.set_ylabel('Count', fontsize='xx-large')
    ax.set_xlabel(x_label, fontsize='xx-large')
    interval_names = data['intervals'].unique()
    ax.set_xticks(range(len(interval_names)))
    ax.set_xticklabels(labels=interval_names, rotation=45, size=15)

    fig = ax.figure
    image_as_array = from_fig_to_array(fig)
    plt.close(fig)
    return image_as_array
예제 #2
0
  def plot(self, pars : Parameters, data : 'Data' = None, channel : Channel = None, only : list = None, exclude : list = None,
           variations : list = None, residuals : bool = False, canvas : plt.Figure = None, labels : bool = True) :
    """Plot the expected event yields and optionally data as well

      The plot is performed for a single model, which must be of `binned_range` type.
      The event yields are plotted as a histogram, as a function of the channel
      observable.
      The `variations` arg allows to plot yield variations for selected NP values. The
      format is { ('par1', val1), ... } , which will plot the yields for the case where
      NP par1 is set to val1 (while other NPs remain at nominal), etc.

      Args:
         pars       : parameter values for which to compute the expected yields
         data       : observed dataset to plot alongside the expected yields
         channel    : name of the channel to plot. If `None`, plot the first channel.
         exclude    : list of sample names to exclude from the plot
         variations : list of NP variations to plot, as a list of (str, float) pairs
                      providing the NP name and the value to set.
         residuals  : if True,  plot the data-model differences
         canvas     : a matplotlib Figure on which to plot (if None, plt.gca() is used)
         labels     : if True (default), add labels to the legend 
    """
    if canvas is None : canvas = plt.gca()
    if not isinstance(only, list)    and only    is not None : only = [ only ]
    if not isinstance(exclude, list) and exclude is not None : exclude = [ exclude ]
    if channel is None :
      channel = list(self.channels.values())[0]
      print("Plotting channel '%s'" % channel.name)
    else :
      if not channel in self.channels : raise KeyError('ERROR: Channel %s is not defined.' % channel)
      channel = self.channels[channel]
    if isinstance(channel, BinnedRangeChannel) :
      grid = [ b['lo_edge'] for b in channel.bins ]
      grid.append(channel.bins[-1]['hi_edge'])
    elif isinstance(channel, SingleBinChannel) :
      grid = [0,1]
    else :
      raise ValueError("Channel '%s' is o an unsupported type" % channel.name)
    xvals = [ (grid[i] + grid[i+1])/2 for i in range(0, len(grid) - 1) ]
    start = self.channel_offsets[channel.name]
    stop  = start + channel.nbins()
    nexp = self.n_exp(pars)[:, start:stop]
    tot_exp = nexp.sum(axis=0)
    if only is not None :
      samples = []
      for sample_name in only :
        if not sample_name in channel.samples : raise ValueError('Sample %s is not defined.' % sample_name)
        samples.append(list(channel.samples).index(sample_name))
      subtract = nexp[samples,:].sum(axis=0)
      subtract = tot_exp - subtract
      line_style = '--'
      title = ','.join(only)
    elif exclude is not None :
      samples = []
      for sample_name in exclude :
        if not sample_name in channel.samples : raise ValueError('Sample %s is not defined.' % sample_name)
        samples.append(list(channel.samples).index(sample_name))
      subtract = nexp[samples,:].sum(axis=0)
      line_style = '--'
      title = 'Model excluding ' + ','.join(exclude)
    else :
      subtract = np.zeros(nexp.shape[1])
      line_style = '-'
      title = 'Model'
    yvals = tot_exp - subtract if not residuals or data is None else tot_exp - subtract - counts
    canvas.hist(xvals, weights=yvals, bins=grid, histtype='step',color='b', linestyle=line_style, label=title if labels else None)
    if data is not None :
      counts = data.counts[start:stop]
      yerrs = [ math.sqrt(n) if n > 0 else 0 for n in counts ]
      yvals = counts if not residuals else np.zeros(channel.nbins())
      canvas.errorbar(xvals, yvals, xerr=[0]*channel.nbins(), yerr=yerrs, fmt='ko', label='Data' if labels else None)
    canvas.set_xlim(grid[0], grid[-1])
    if variations is not None :
      for v in variations :
        vpars = pars.clone()
        vpars.set(v[0], v[1])
        col = 'r' if len(v) < 3 else v[2]
        nexp = self.n_exp(vpars)[:, start:stop]
        if only is None and exclude is None :
          subtract = np.zeros(nexp.shape[1])
        else :
          subtract = nexp[samples,:].sum(axis=0)
          if only is not None : subtract = nexp.sum(axis=0) - subtract
        tot_exp = nexp.sum(axis=0) - subtract
        canvas.hist(xvals, weights=tot_exp, bins=grid, histtype='step',color=col, linestyle=line_style, label='%s=%+g' %(v[0], v[1]) if labels else None)
    if labels : canvas.legend()
    canvas.set_title(self.name)
    if isinstance(channel, BinnedRangeChannel) :
      canvas.set_xlabel('$' + channel.obs_name + '$' + ((' ['  + channel.obs_unit + ']') if channel.obs_unit != '' else ''))
      canvas.set_ylabel('Events / bin')
    elif isinstance(channel, SingleBinChannel) :
      canvas.set_xlabel(channel.name)
      canvas.set_ylabel('Events')