def test_compute_cut(): random = RandomState() predictions = random.permutation(100) labels = numpy.ones(100) for eff in [0.1, 0.5, 0.75, 0.99]: cut = compute_cut_for_efficiency(eff, labels, predictions) assert numpy.sum(predictions > cut) / len(predictions) == eff, 'the cut was set wrongly' weights = numpy.array(random.exponential(size=100)) for eff in random.uniform(size=100): cut = compute_cut_for_efficiency(eff, labels, predictions, sample_weight=weights) lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights) upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights) assert lower < eff < upper
def compute_sde_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight=None, power=2.): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) group_weights = compute_group_weights_by_indices( groups_indices, sample_weight=sample_weight) divided_weight = compute_divided_weight_by_indices( groups_indices, sample_weight=sample_weight * mask) cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask, y_pred=y_pred, sample_weight=sample_weight) sde = 0. for cut in cuts: group_efficiencies = compute_group_efficiencies_by_indices( y_pred, groups_indices=groups_indices, cut=cut, divided_weight=divided_weight) # print('FROM SDE function', cut, group_efficiencies) sde += weighted_deviation(group_efficiencies, weights=group_weights, power=power) return (sde / len(cuts))**(1. / power)
def compute_sde_on_bins(y_pred, mask, bin_indices, target_efficiencies, power=2., sample_weight=None): # ignoring events from other classes sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies( y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += weighted_deviation(bin_efficiencies, weights=bin_weights, power=power) return (result / len(cuts))**(1. / power)
def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) # ignoring events from other classes y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies( y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += theil(bin_efficiencies, weights=bin_weights) return result / len(cuts)
def test_compute_cut(): random = RandomState() predictions = random.permutation(100) labels = numpy.ones(100) for eff in [0.1, 0.5, 0.75, 0.99]: cut = compute_cut_for_efficiency(eff, labels, predictions) assert numpy.sum(predictions > cut) / len( predictions) == eff, 'the cut was set wrongly' weights = numpy.array(random.exponential(size=100)) for eff in random.uniform(size=100): cut = compute_cut_for_efficiency(eff, labels, predictions, sample_weight=weights) lower = numpy.sum(weights[predictions > cut + 1]) / numpy.sum(weights) upper = numpy.sum(weights[predictions > cut - 1]) / numpy.sum(weights) assert lower < eff < upper
def compute_theil_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) groups_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight) divided_weight = compute_divided_weight_by_indices(groups_indices, sample_weight=sample_weight * mask) cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask, y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: groups_efficiencies = compute_group_efficiencies_by_indices(y_pred, groups_indices=groups_indices, cut=cut, divided_weight=divided_weight) result += theil(groups_efficiencies, groups_weights) return result / len(cuts)
def compute_sde_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight=None, power=2.): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight) divided_weight = compute_divided_weight_by_indices(groups_indices, sample_weight=sample_weight * mask) cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask, y_pred=y_pred, sample_weight=sample_weight) sde = 0. for cut in cuts: group_efficiencies = compute_group_efficiencies_by_indices(y_pred, groups_indices=groups_indices, cut=cut, divided_weight=divided_weight) # print('FROM SDE function', cut, group_efficiencies) sde += weighted_deviation(group_efficiencies, weights=group_weights, power=power) return (sde / len(cuts)) ** (1. / power)
def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) # ignoring events from other classes y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += theil(bin_efficiencies, weights=bin_weights) return result / len(cuts)
def compute_sde_on_bins(y_pred, mask, bin_indices, target_efficiencies, power=2., sample_weight=None): # ignoring events from other classes sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += weighted_deviation(bin_efficiencies, weights=bin_weights, power=power) return (result / len(cuts)) ** (1. / power)
def compute_theil_on_groups(y_pred, mask, groups_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) groups_weights = compute_group_weights_by_indices( groups_indices, sample_weight=sample_weight) divided_weight = compute_divided_weight_by_indices( groups_indices, sample_weight=sample_weight * mask) cuts = compute_cut_for_efficiency(target_efficiencies, mask=mask, y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: groups_efficiencies = compute_group_efficiencies_by_indices( y_pred, groups_indices=groups_indices, cut=cut, divided_weight=divided_weight) result += theil(groups_efficiencies, groups_weights) return result / len(cuts)