def test_compare_sde_computations(n_samples=1000, n_bins=10): y, pred, weights, bins, groups = generate_binned_dataset(n_samples=n_samples, n_bins=n_bins) target_efficiencies = RandomState().uniform(size=3) a = compute_sde_on_bins(pred[:, 1], mask=(y == 1), bin_indices=bins, target_efficiencies=target_efficiencies, sample_weight=weights) b = compute_sde_on_groups(pred[:, 1], mask=(y == 1), groups_indices=groups, target_efficiencies=target_efficiencies, sample_weight=weights) assert numpy.allclose(a, b)
def test_cvm_sde_limit(size=2000): """ Checks that in the limit CvM coincides with MSE """ effs = numpy.linspace(0, 1, 2000) y_pred = random.uniform(size=size) y = random.uniform(size=size) > 0.5 sample_weight = random.exponential(size=size) bin_indices = random.randint(0, 10, size=size) y_pred += bin_indices * random.uniform() mask = y == 1 val1 = bin_based_cvm(y_pred[mask], sample_weight=sample_weight[mask], bin_indices=bin_indices[mask]) val2 = compute_sde_on_bins(y_pred, mask=mask, bin_indices=bin_indices, target_efficiencies=effs, sample_weight=sample_weight) assert numpy.allclose(val1, val2 ** 2, atol=1e-3, rtol=1e-2)