def test_compute_cut():
    random = RandomState()
    predictions = random.permutation(100)
    labels = numpy.ones(100)
    for eff in [0.1, 0.5, 0.75, 0.99]:
        cut = compute_cut_for_efficiency(eff, labels, predictions)
        assert numpy.sum(predictions > cut) / len(predictions) == eff, 'the cut was set wrongly'

    weights = numpy.array(random.exponential(size=100))
    for eff in random.uniform(size=100):
        cut = compute_cut_for_efficiency(eff, labels, predictions, sample_weight=weights)
        lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights)
        upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights)
        assert lower < eff < upper
Ejemplo n.º 2
0
def compute_sde_on_groups(y_pred,
                          mask,
                          groups_indices,
                          target_efficiencies,
                          sample_weight=None,
                          power=2.):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    group_weights = compute_group_weights_by_indices(
        groups_indices, sample_weight=sample_weight)
    divided_weight = compute_divided_weight_by_indices(
        groups_indices, sample_weight=sample_weight * mask)

    cuts = compute_cut_for_efficiency(target_efficiencies,
                                      mask=mask,
                                      y_pred=y_pred,
                                      sample_weight=sample_weight)

    sde = 0.
    for cut in cuts:
        group_efficiencies = compute_group_efficiencies_by_indices(
            y_pred,
            groups_indices=groups_indices,
            cut=cut,
            divided_weight=divided_weight)
        # print('FROM SDE function', cut, group_efficiencies)
        sde += weighted_deviation(group_efficiencies,
                                  weights=group_weights,
                                  power=power)
    return (sde / len(cuts))**(1. / power)
Ejemplo n.º 3
0
def compute_sde_on_bins(y_pred,
                        mask,
                        bin_indices,
                        target_efficiencies,
                        power=2.,
                        sample_weight=None):
    # ignoring events from other classes
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    y_pred = y_pred[mask]
    bin_indices = bin_indices[mask]
    sample_weight = sample_weight[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices,
                                      sample_weight=sample_weight)
    cuts = compute_cut_for_efficiency(target_efficiencies,
                                      mask=numpy.ones(len(y_pred), dtype=bool),
                                      y_pred=y_pred,
                                      sample_weight=sample_weight)

    result = 0.
    for cut in cuts:
        bin_efficiencies = compute_bin_efficiencies(
            y_pred,
            bin_indices=bin_indices,
            cut=cut,
            sample_weight=sample_weight)
        result += weighted_deviation(bin_efficiencies,
                                     weights=bin_weights,
                                     power=power)

    return (result / len(cuts))**(1. / power)
Ejemplo n.º 4
0
def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies,
                          sample_weight):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)

    # ignoring events from other classes
    y_pred = y_pred[mask]
    bin_indices = bin_indices[mask]
    sample_weight = sample_weight[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices,
                                      sample_weight=sample_weight)
    cuts = compute_cut_for_efficiency(target_efficiencies,
                                      mask=numpy.ones(len(y_pred), dtype=bool),
                                      y_pred=y_pred,
                                      sample_weight=sample_weight)
    result = 0.
    for cut in cuts:
        bin_efficiencies = compute_bin_efficiencies(
            y_pred,
            bin_indices=bin_indices,
            cut=cut,
            sample_weight=sample_weight)
        result += theil(bin_efficiencies, weights=bin_weights)
    return result / len(cuts)
Ejemplo n.º 5
0
def test_compute_cut():
    random = RandomState()
    predictions = random.permutation(100)
    labels = numpy.ones(100)
    for eff in [0.1, 0.5, 0.75, 0.99]:
        cut = compute_cut_for_efficiency(eff, labels, predictions)
        assert numpy.sum(predictions > cut) / len(
            predictions) == eff, 'the cut was set wrongly'

    weights = numpy.array(random.exponential(size=100))
    for eff in random.uniform(size=100):
        cut = compute_cut_for_efficiency(eff,
                                         labels,
                                         predictions,
                                         sample_weight=weights)
        lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights)
        upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights)
        assert lower < eff < upper
Ejemplo n.º 6
0
def compute_theil_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    groups_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight)
    divided_weight = compute_divided_weight_by_indices(groups_indices, sample_weight=sample_weight * mask)
    cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask,
                                      y_pred=y_pred, sample_weight=sample_weight)

    result = 0.
    for cut in cuts:
        groups_efficiencies = compute_group_efficiencies_by_indices(y_pred, groups_indices=groups_indices,
                                                         cut=cut, divided_weight=divided_weight)
        result += theil(groups_efficiencies, groups_weights)
    return result / len(cuts)
Ejemplo n.º 7
0
def compute_sde_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight=None, power=2.):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight)
    divided_weight = compute_divided_weight_by_indices(groups_indices, sample_weight=sample_weight * mask)

    cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask, y_pred=y_pred, sample_weight=sample_weight)

    sde = 0.
    for cut in cuts:
        group_efficiencies = compute_group_efficiencies_by_indices(y_pred, groups_indices=groups_indices,
                                                        cut=cut, divided_weight=divided_weight)
        # print('FROM SDE function', cut, group_efficiencies)
        sde += weighted_deviation(group_efficiencies, weights=group_weights, power=power)
    return (sde / len(cuts)) ** (1. / power)
Ejemplo n.º 8
0
def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies, sample_weight):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)

    # ignoring events from other classes
    y_pred = y_pred[mask]
    bin_indices = bin_indices[mask]
    sample_weight = sample_weight[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight)
    cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool),
                                      y_pred=y_pred, sample_weight=sample_weight)
    result = 0.
    for cut in cuts:
        bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices,
                                                    cut=cut, sample_weight=sample_weight)
        result += theil(bin_efficiencies, weights=bin_weights)
    return result / len(cuts)
Ejemplo n.º 9
0
def compute_sde_on_bins(y_pred, mask, bin_indices, target_efficiencies, power=2., sample_weight=None):
    # ignoring events from other classes
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    y_pred = y_pred[mask]
    bin_indices = bin_indices[mask]
    sample_weight = sample_weight[mask]

    bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight)
    cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool),
                                      y_pred=y_pred, sample_weight=sample_weight)

    result = 0.
    for cut in cuts:
        bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices,
                                                    cut=cut, sample_weight=sample_weight)
        result += weighted_deviation(bin_efficiencies, weights=bin_weights, power=power)

    return (result / len(cuts)) ** (1. / power)
Ejemplo n.º 10
0
def compute_theil_on_groups(y_pred, mask, groups_indices, target_efficiencies,
                            sample_weight):
    y_pred = column_or_1d(y_pred)
    sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight)
    groups_weights = compute_group_weights_by_indices(
        groups_indices, sample_weight=sample_weight)
    divided_weight = compute_divided_weight_by_indices(
        groups_indices, sample_weight=sample_weight * mask)
    cuts = compute_cut_for_efficiency(target_efficiencies,
                                      mask=mask,
                                      y_pred=y_pred,
                                      sample_weight=sample_weight)

    result = 0.
    for cut in cuts:
        groups_efficiencies = compute_group_efficiencies_by_indices(
            y_pred,
            groups_indices=groups_indices,
            cut=cut,
            divided_weight=divided_weight)
        result += theil(groups_efficiencies, groups_weights)
    return result / len(cuts)