def test_fast_cvm(n_samples=1000): random = RandomState() data1 = random.uniform(size=n_samples) weights1 = random.uniform(size=n_samples) mask = random.uniform(size=n_samples) > 0.5 data2 = data1[mask] weights2 = weights1[mask] a = cvm_2samp(data1, data2, weights1, weights2) prepared_data1, prepared_weights1, F1 = prepare_distibution(data1, weights1) b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, F1=F1) assert numpy.allclose(a, b)
def test_fast_cvm(n_samples=1000): random = RandomState() data1 = random.uniform(size=n_samples) weights1 = random.uniform(size=n_samples) mask = random.uniform(size=n_samples) > 0.5 data2 = data1[mask] weights2 = weights1[mask] a = cvm_2samp(data1, data2, weights1, weights2) prepared_data1, prepared_weights1, F1 = prepare_distribution(data1, weights1) b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, cdf1=F1) assert numpy.allclose(a, b)
def group_based_cvm(y_pred, mask, sample_weight, groups_indices): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) group_weights = compute_group_weights_by_indices(groups_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution(y_pred[mask], weights=sample_weight[mask]) for group, group_weight in zip(groups_indices, group_weights): local_distribution = y_pred[group] local_weights = sample_weight[group] result += group_weight * _cvm_2samp_fast(global_data, local_distribution, global_weight, local_weights, global_F) return result
def group_based_cvm(y_pred, mask, sample_weight, groups_indices): y_pred = column_or_1d(y_pred) sample_weight = check_sample_weight(y_pred, sample_weight=sample_weight) group_weights = compute_group_weights_by_indices( groups_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution( y_pred[mask], weights=sample_weight[mask]) for group, group_weight in zip(groups_indices, group_weights): local_distribution = y_pred[group] local_weights = sample_weight[group] result += group_weight * _cvm_2samp_fast( global_data, local_distribution, global_weight, local_weights, global_F) return result
def bin_based_cvm(y_pred, sample_weight, bin_indices): """Cramer-von Mises similarity, quite slow meanwhile""" assert len(y_pred) == len(sample_weight) == len(bin_indices) bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution(y_pred, weights=sample_weight) for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue bin_mask = bin_indices == bin local_distribution = y_pred[bin_mask] local_weights = sample_weight[bin_mask] result += bin_weight * _cvm_2samp_fast(global_data, local_distribution, global_weight, local_weights, global_F) return result
def bin_based_cvm(y_pred, sample_weight, bin_indices): """Cramer-von Mises similarity, quite slow meanwhile""" assert len(y_pred) == len(sample_weight) == len(bin_indices) bin_weights = compute_bin_weights(bin_indices=bin_indices, sample_weight=sample_weight) result = 0. global_data, global_weight, global_F = prepare_distribution( y_pred, weights=sample_weight) for bin, bin_weight in enumerate(bin_weights): if bin_weight <= 0: continue bin_mask = bin_indices == bin local_distribution = y_pred[bin_mask] local_weights = sample_weight[bin_mask] result += bin_weight * _cvm_2samp_fast(global_data, local_distribution, global_weight, local_weights, global_F) return result