def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) # ignoring events from other classes y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies( y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += theil(bin_efficiencies, weights=bin_weights) return result / len(cuts)
def compute_sde_on_bins(y_pred, mask, bin_indices, target_efficiencies, power=2., sample_weight=None): # ignoring events from other classes sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies( y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += weighted_deviation(bin_efficiencies, weights=bin_weights, power=power) return (result / len(cuts))**(1. / power)
def compute_theil_on_bins(y_pred, mask, bin_indices, target_efficiencies, sample_weight): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) # ignoring events from other classes y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += theil(bin_efficiencies, weights=bin_weights) return result / len(cuts)
def compute_sde_on_bins(y_pred, mask, bin_indices, target_efficiencies, power=2., sample_weight=None): # ignoring events from other classes sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) y_pred = y_pred[mask] bin_indices = bin_indices[mask] sample_weight = sample_weight[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) cuts = compute_cut_for_efficiency(target_efficiencies, mask=numpy.ones(len(y_pred), dtype=bool), y_pred=y_pred, sample_weight=sample_weight) result = 0. for cut in cuts: bin_efficiencies = compute_bin_efficiencies(y_pred, bin_indices=bin_indices, cut=cut, sample_weight=sample_weight) result += weighted_deviation(bin_efficiencies, weights=bin_weights, power=power) return (result / len(cuts)) ** (1. / power)
def bin_based_cvm(y_pred, sample_weight, bin_indices): """Cramer-von Mises similarity, quite slow meanwhile""" assert len(y_pred) == len(sample_weight) == len(bin_indices) bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution(y_pred, weights=sample_weight) for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue bin_mask = bin_indices == bin local_distribution = y_pred[bin_mask] local_weights = sample_weight[bin_mask] result += bin_weight * _cvm_2samp_fast(global_data, local_distribution, global_weight, local_weights, global_F) return result
def bin_based_ks(y_pred, mask, sample_weight, bin_indices): """Kolmogorov-Smirnov flatness on bins""" assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask) y_pred = y_pred[mask] sample_weight = sample_weight[mask] bin_indices = bin_indices[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution(y_pred, weights=sample_weight) result = 0. for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue local_distribution = y_pred[bin_indices == bin] local_weights = sample_weight[bin_indices == bin] result += bin_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result
def bin_based_cvm(y_pred, sample_weight, bin_indices): """Cramer-von Mises similarity, quite slow meanwhile""" assert len(y_pred) == len(sample_weight) == len(bin_indices) bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution( y_pred, weights=sample_weight) for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue bin_mask = bin_indices == bin local_distribution = y_pred[bin_mask] local_weights = sample_weight[bin_mask] result += bin_weight * _cvm_2samp_fast(global_data, local_distribution, global_weight, local_weights, global_F) return result
def bin_based_ks(y_pred, mask, sample_weight, bin_indices): """Kolmogorov-Smirnov flatness on bins""" assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask) y_pred = y_pred[mask] sample_weight = sample_weight[mask] bin_indices = bin_indices[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution( y_pred, weights=sample_weight) result = 0. for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue local_distribution = y_pred[bin_indices == bin] local_weights = sample_weight[bin_indices == bin] result += bin_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result