def test_fast_cvm(n_samples=1000): random = RandomState() data1 = random.uniform(size=n_samples) weights1 = random.uniform(size=n_samples) mask = random.uniform(size=n_samples) > 0.5 data2 = data1[mask] weights2 = weights1[mask] a = cvm_2samp(data1, data2, weights1, weights2) prepared_data1, prepared_weights1, F1 = prepare_distribution(data1, weights1) b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, cdf1=F1) assert numpy.allclose(a, b)
def test_cvm2samp(n_samples1=100, n_samples2=100): data1 = numpy.random.normal(size=n_samples1) weights1 = numpy.random.random(size=n_samples1) data2 = numpy.random.normal(size=n_samples2) weights2 = numpy.random.random(size=n_samples2) CVM = cvm_2samp(data1, data2, weights1=weights1, weights2=weights2) # alternative way to check labels = [0] * len(data1) + [1] * len(data2) data = numpy.concatenate([data1, data2]) weights = numpy.concatenate([weights1, weights2]) from sklearn.metrics import roc_curve fpr, tpr, _ = roc_curve(labels, data, sample_weight=weights) # data1 corresponds to weights1 = numpy.diff(numpy.insert(fpr, 0, [0])) CVM2 = numpy.sum(weights1 * (symmetrize(fpr) - symmetrize(tpr)) ** 2) print(CVM, CVM2) assert numpy.allclose(CVM, CVM2), 'different values of CVM'
def test_cvm2samp(n_samples1=100, n_samples2=100): data1 = numpy.random.normal(size=n_samples1) weights1 = numpy.random.random(size=n_samples1) data2 = numpy.random.normal(size=n_samples2) weights2 = numpy.random.random(size=n_samples2) CVM = cvm_2samp(data1, data2, weights1=weights1, weights2=weights2) # alternative way to check labels = [0] * len(data1) + [1] * len(data2) data = numpy.concatenate([data1, data2]) weights = numpy.concatenate([weights1, weights2]) from sklearn.metrics import roc_curve fpr, tpr, _ = roc_curve(labels, data, sample_weight=weights) # data1 corresponds to weights1 = numpy.diff(numpy.insert(fpr, 0, [0])) CVM2 = numpy.sum(weights1 * (symmetrize(fpr) - symmetrize(tpr))**2) print(CVM, CVM2) assert numpy.allclose(CVM, CVM2), 'different values of CVM'