Example #1
0
def plot_flatness_by_particle(labels,
                              predictions_dict,
                              spectator,
                              spectator_name,
                              predictions_dict_comparison=None,
                              names_algorithms=['MVA', 'Baseline'],
                              weights=None,
                              bins_number=30,
                              ignored_sideband=0.1,
                              thresholds=None,
                              cuts_values=False,
                              ncol=1):
    plt.figure(figsize=(22, 20))
    for n, (name, label) in enumerate(names_labels_correspondence.items()):
        plt.subplot(3, 2, n + 1)
        mask = labels == label
        legends = []
        for preds, name_algo in zip(
            [predictions_dict, predictions_dict_comparison], names_algorithms):
            if preds is None:
                continue
            probs = preds[label][mask]
            if cuts_values:
                thresholds_values = cut_values
            else:
                thresholds_values = [
                    weighted_quantile(probs,
                                      quantiles=1 - eff / 100.,
                                      sample_weight=None
                                      if weights is None else weights[mask])
                    for eff in thresholds
                ]
            eff = get_efficiencies(
                probs,
                spectator[mask],
                sample_weight=None if weights is None else weights[mask],
                bins_number=bins_number,
                errors=True,
                ignored_sideband=ignored_sideband,
                thresholds=thresholds_values)
            for thr in thresholds_values:
                eff[thr] = (eff[thr][0], 100 * numpy.array(eff[thr][1]),
                            100 * numpy.array(eff[thr][2]), eff[thr][3])
            plot_fig = ErrorPlot(eff)
            plot_fig.xlabel = '{} {}'.format(name, spectator_name)
            plot_fig.ylabel = 'Efficiency'
            plot_fig.title = name
            plot_fig.ylim = (0, 100)
            plot_fig.plot(fontsize=22)
            plt.xticks(fontsize=12), plt.yticks(fontsize=12)
            legends.append(
                ['{} Eff {}%'.format(thr, name_algo) for thr in thresholds])
        plt.legend(numpy.concatenate(legends),
                   loc='best',
                   fontsize=12,
                   framealpha=0.5,
                   ncol=ncol)
Example #2
0
def plot_flatness_particle(labels, predictions_dict, spectator, spectator_name, particle_name, 
                           weights=None, bins_number=30, ignored_sideband=0.1, 
                           thresholds=None, cuts_values=False):
    plt.figure(figsize=(18, 22))
    for n, (name, label) in enumerate(names_labels_correspondence.items()):
        plt.subplot(3, 2, n + 1)
        mask = labels == names_labels_correspondence[particle_name]
        probs = predictions_dict[label][mask]
        mask_signal = labels == label
        probs_signal = predictions_dict[label][mask_signal]
        if cuts_values:
            thresholds_values = cut_values
        else:
            thresholds_values = [weighted_quantile(probs_signal, quantiles=1 - eff / 100., 
                                                   sample_weight=None if weights is None else weights[mask_signal])
                                 for eff in thresholds]
        eff = get_efficiencies(probs, spectator[mask], 
                               sample_weight=None if weights is None else weights[mask], 
                               bins_number=bins_number, errors=True, ignored_sideband=ignored_sideband,
                               thresholds=thresholds_values)
        for thr in thresholds_values:
            eff[thr] = (eff[thr][0], 100*numpy.array(eff[thr][1]), 100*numpy.array(eff[thr][2]), eff[thr][3])
        plot_fig = ErrorPlot(eff)
        plot_fig.xlabel = '{} {}'.format(particle_name, spectator_name)
        plot_fig.ylabel = 'Efficiency'
        plot_fig.title = 'MVA {}'.format(name)
        plot_fig.ylim = (0, 100)
        plot_fig.plot(fontsize=22)
        plt.xticks(fontsize=12), plt.yticks(fontsize=12)
        if not cuts_values:
            plt.legend(['Signal Eff {}%'.format(thr) for thr in thresholds], loc='best', fontsize=18, framealpha=0.5)
Example #3
0
def plot_flatness_by_particle(labels, predictions_dict, spectator, spectator_name, predictions_dict_comparison=None,
                              names_algorithms=['MVA', 'Baseline'],
                              weights=None, bins_number=30, ignored_sideband=0.1, 
                              thresholds=None, cuts_values=False, ncol=1):
    plt.figure(figsize=(22, 20))
    for n, (name, label) in enumerate(names_labels_correspondence.items()):
        plt.subplot(3, 2, n + 1)
        mask =labels == label
        legends = []
        for preds, name_algo in zip([predictions_dict, predictions_dict_comparison], names_algorithms):
            if preds is None:
                continue
            probs = preds[label][mask]
            if cuts_values:
                thresholds_values = cut_values
            else:
                thresholds_values = [weighted_quantile(probs, quantiles=1 - eff / 100., 
                                                       sample_weight=None if weights is None else weights[mask])
                                     for eff in thresholds]
            eff = get_efficiencies(probs, spectator[mask], 
                                   sample_weight=None if weights is None else weights[mask], 
                                   bins_number=bins_number, errors=True, ignored_sideband=ignored_sideband,
                                   thresholds=thresholds_values)
            for thr in thresholds_values:
                eff[thr] = (eff[thr][0], 100*numpy.array(eff[thr][1]), 100*numpy.array(eff[thr][2]), eff[thr][3])
            plot_fig = ErrorPlot(eff)
            plot_fig.xlabel = '{} {}'.format(name, spectator_name)
            plot_fig.ylabel = 'Efficiency'
            plot_fig.title = name
            plot_fig.ylim = (0, 100)
            plot_fig.plot(fontsize=22)
            plt.xticks(fontsize=12), plt.yticks(fontsize=12)
            legends.append(['{} Eff {}%'.format(thr, name_algo) for thr in thresholds])
        plt.legend(numpy.concatenate(legends), loc='best', fontsize=12, framealpha=0.5, ncol=ncol)
Example #4
0
def plot_flatness_particle(labels,
                           predictions_dict,
                           spectator,
                           spectator_name,
                           particle_name,
                           weights=None,
                           bins_number=30,
                           ignored_sideband=0.1,
                           thresholds=None,
                           cuts_values=False):
    plt.figure(figsize=(18, 22))
    for n, (name, label) in enumerate(names_labels_correspondence.items()):
        plt.subplot(3, 2, n + 1)
        mask = labels == names_labels_correspondence[particle_name]
        probs = predictions_dict[label][mask]
        mask_signal = labels == label
        probs_signal = predictions_dict[label][mask_signal]
        if cuts_values:
            thresholds_values = cut_values
        else:
            thresholds_values = [
                weighted_quantile(probs_signal,
                                  quantiles=1 - eff / 100.,
                                  sample_weight=None
                                  if weights is None else weights[mask_signal])
                for eff in thresholds
            ]
        eff = get_efficiencies(
            probs,
            spectator[mask],
            sample_weight=None if weights is None else weights[mask],
            bins_number=bins_number,
            errors=True,
            ignored_sideband=ignored_sideband,
            thresholds=thresholds_values)
        for thr in thresholds_values:
            eff[thr] = (eff[thr][0], 100 * numpy.array(eff[thr][1]),
                        100 * numpy.array(eff[thr][2]), eff[thr][3])
        plot_fig = ErrorPlot(eff)
        plot_fig.xlabel = '{} {}'.format(particle_name, spectator_name)
        plot_fig.ylabel = 'Efficiency'
        plot_fig.title = 'MVA {}'.format(name)
        plot_fig.ylim = (0, 100)
        plot_fig.plot(fontsize=22)
        plt.xticks(fontsize=12), plt.yticks(fontsize=12)
        if not cuts_values:
            plt.legend(['Signal Eff {}%'.format(thr) for thr in thresholds],
                       loc='best',
                       fontsize=18,
                       framealpha=0.5)
Example #5
0
def flatness_eta_figure(proba,
                        proba_baseline,
                        eta,
                        track_name,
                        particle_name,
                        save_path=None,
                        show=False):
    """
    Plot signal efficiency vs pseudo rapidity figure.

    Parameters
    ----------
    proba : array_like
        Predicted probabilities with array shape = [n_samples].
    probas_baseline : array_like
        Baseline predicted probabilities with array shape = [n_samples].
    eta : array_like
        Pseudo rapidity values with array shape = [n_samples].
    track_name : string
        The track name.
    particle_name : string
        The particle name.
    save_path : string
        Path to a directory where the figure will saved. If None the figure will not be saved.
    show : boolean
        If true the figure will be displayed.
    """

    thresholds = numpy.percentile(proba, 100 - numpy.array([20, 50, 80]))
    thresholds_baseline = numpy.percentile(proba_baseline,
                                           100 - numpy.array([20, 50, 80]))

    eff = get_efficiencies(proba,
                           eta,
                           bins_number=30,
                           errors=True,
                           ignored_sideband=0.005,
                           thresholds=thresholds)

    eff_baseline = get_efficiencies(proba_baseline,
                                    eta,
                                    bins_number=30,
                                    errors=True,
                                    ignored_sideband=0.005,
                                    thresholds=thresholds_baseline)

    for i in thresholds:
        eff[i] = (eff[i][0], 100. * eff[i][1], 100. * eff[i][2], eff[i][3])

    for i in thresholds_baseline:
        eff_baseline[i] = (eff_baseline[i][0], 100. * eff_baseline[i][1],
                           100. * eff_baseline[i][2], eff_baseline[i][3])

    eff_total = OrderedDict()
    num = len(eff) + len(eff_baseline)

    for i in range(len(eff)):

        v = eff[eff.keys()[i]]
        v_baseline = eff_baseline[eff_baseline.keys()[i]]

        eff_total[num] = v
        eff_total[num - 1] = v_baseline
        num += -2

    plot_fig = ErrorPlot(eff_total)
    plot_fig.ylim = (0, 100)

    plot_fig.plot(new_plot=True, figsize=(10, 7))
    labels = [
        'Eff model = 20 %', 'Eff baseline = 20 %', 'Eff model = 50 %',
        'Eff baseline = 50 %', 'Eff model = 80 %', 'Eff baseline = 80 %'
    ]
    plt.legend(labels, loc='best', prop={'size': 10}, framealpha=0.5, ncol=3)
    plt.xlabel(track_name + ' ' + particle_name + ' Pseudo Rapidity', size=15)
    plt.xticks(size=15)
    plt.ylabel('Efficiency / %', size=15)
    plt.yticks(size=15)
    plt.title('Flatness_SignalMVAEffVPseudoRapidity_' + track_name + ' ' +
              particle_name,
              size=15)

    if save_path != None:
        plt.savefig(save_path + "/" + 'Flatness_SignalMVAEffVPseudoRapidity_' +
                    track_name + '_' + particle_name + ".png")

    if show == True:
        plt.show()

    plt.clf()
    plt.close()
Example #6
0
def flatness_eta_figure(proba, proba_baseline, eta, track_name, particle_name, save_path=None, show=False):

    """
    Plot signal efficiency vs pseudo rapidity figure.

    Parameters
    ----------
    proba : array_like
        Predicted probabilities with array shape = [n_samples].
    probas_baseline : array_like
        Baseline predicted probabilities with array shape = [n_samples].
    eta : array_like
        Pseudo rapidity values with array shape = [n_samples].
    track_name : string
        The track name.
    particle_name : string
        The particle name.
    save_path : string
        Path to a directory where the figure will saved. If None the figure will not be saved.
    show : boolean
        If true the figure will be displayed.
    """

    thresholds = numpy.percentile(proba, 100 - numpy.array([20, 50, 80]))
    thresholds_baseline = numpy.percentile(proba_baseline, 100 - numpy.array([20, 50, 80]))

    eff = get_efficiencies(proba,
                           eta,
                           bins_number=30,
                           errors=True,
                           ignored_sideband=0.005,
                           thresholds=thresholds)

    eff_baseline = get_efficiencies(proba_baseline,
                                    eta,
                                    bins_number=30,
                                    errors=True,
                                    ignored_sideband=0.005,
                                    thresholds=thresholds_baseline)

    for i in thresholds:
        eff[i] = (eff[i][0], 100. * eff[i][1], 100. * eff[i][2], eff[i][3])

    for i in thresholds_baseline:
        eff_baseline[i] = (eff_baseline[i][0], 100. * eff_baseline[i][1], 100. * eff_baseline[i][2], eff_baseline[i][3])


    eff_total = OrderedDict()
    num = len(eff) + len(eff_baseline)

    for i in range(len(eff)):

        v = eff[eff.keys()[i]]
        v_baseline = eff_baseline[eff_baseline.keys()[i]]

        eff_total[num] = v
        eff_total[num - 1] = v_baseline
        num += -2


    plot_fig = ErrorPlot(eff_total)
    plot_fig.ylim = (0, 100)

    plot_fig.plot(new_plot=True, figsize=(10,7))
    labels = ['Eff model = 20 %', 'Eff baseline = 20 %',
              'Eff model = 50 %', 'Eff baseline = 50 %',
              'Eff model = 80 %', 'Eff baseline = 80 %']
    plt.legend(labels, loc='best',prop={'size':10}, framealpha=0.5, ncol=3)
    plt.xlabel(track_name + ' ' + particle_name + ' Pseudo Rapidity', size=15)
    plt.xticks(size=15)
    plt.ylabel('Efficiency / %', size=15)
    plt.yticks(size=15)
    plt.title('Flatness_SignalMVAEffVPseudoRapidity_' + track_name + ' ' + particle_name, size=15)

    if save_path != None:
        plt.savefig(save_path + "/" + 'Flatness_SignalMVAEffVPseudoRapidity_' + track_name + '_' + particle_name + ".png")

    if show == True:
        plt.show()

    plt.clf()
    plt.close()