def test_ks2samp_fast(size=1000): y1 = RandomState().uniform(size=size) y2 = y1[RandomState().uniform(size=size) > 0.5] a = ks_2samp(y1, y2)[0] prep_data, prep_weights, prep_F = prepare_distibution(y1, numpy.ones(len(y1))) b = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), F1=prep_F) c = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), F1=prep_F) d = ks_2samp_weighted(y1, y2, numpy.ones(len(y1)) / 3, numpy.ones(len(y2)) / 4) assert numpy.allclose(a, b, rtol=1e-2, atol=1e-3) assert numpy.allclose(b, c) assert numpy.allclose(b, d) print('ks2samp is ok')
def test_ks2samp_fast(size=1000): y1 = RandomState().uniform(size=size) y2 = y1[RandomState().uniform(size=size) > 0.5] a = ks_2samp(y1, y2)[0] prep_data, prep_weights, prep_F = prepare_distribution(y1, numpy.ones(len(y1))) b = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), cdf1=prep_F) c = _ks_2samp_fast(prep_data, y2, prep_weights, numpy.ones(len(y2)), cdf1=prep_F) d = ks_2samp_weighted(y1, y2, numpy.ones(len(y1)) / 3, numpy.ones(len(y2)) / 4) assert numpy.allclose(a, b, rtol=1e-2, atol=1e-3) assert numpy.allclose(b, c) assert numpy.allclose(b, d) print('ks2samp is ok')
def groups_based_ks(y_pred, mask, sample_weight, groups_indices): """Kolmogorov-Smirnov flatness on groups """ assert len(y_pred) == len(sample_weight) == len(mask) group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution(y_pred[mask], weights=sample_weight[mask]) result = 0. for group_weight, group_indices in zip(group_weights, groups_indices): local_distribution = y_pred[group_indices] local_weights = sample_weight[group_indices] result += group_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result
def groups_based_ks(y_pred, mask, sample_weight, groups_indices): """Kolmogorov-Smirnov flatness on groups """ assert len(y_pred) == len(sample_weight) == len(mask) group_weights = compute_group_weights_by_indices( groups_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution( y_pred[mask], weights=sample_weight[mask]) result = 0. for group_weight, group_indices in zip(group_weights, groups_indices): local_distribution = y_pred[group_indices] local_weights = sample_weight[group_indices] result += group_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result
def bin_based_ks(y_pred, mask, sample_weight, bin_indices): """Kolmogorov-Smirnov flatness on bins""" assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask) y_pred = y_pred[mask] sample_weight = sample_weight[mask] bin_indices = bin_indices[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution(y_pred, weights=sample_weight) result = 0. for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue local_distribution = y_pred[bin_indices == bin] local_weights = sample_weight[bin_indices == bin] result += bin_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result
def bin_based_ks(y_pred, mask, sample_weight, bin_indices): """Kolmogorov-Smirnov flatness on bins""" assert len(y_pred) == len(sample_weight) == len(bin_indices) == len(mask) y_pred = y_pred[mask] sample_weight = sample_weight[mask] bin_indices = bin_indices[mask] bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) prepared_data, prepared_weight, prep_F = prepare_distribution( y_pred, weights=sample_weight) result = 0. for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue local_distribution = y_pred[bin_indices == bin] local_weights = sample_weight[bin_indices == bin] result += bin_weight * \ _ks_2samp_fast(prepared_data, local_distribution, prepared_weight, local_weights, prep_F) return result