Beispiel #1
0
def test_fast_cvm(n_samples=1000):
    random = RandomState()
    data1 = random.uniform(size=n_samples)
    weights1 = random.uniform(size=n_samples)
    mask = random.uniform(size=n_samples) > 0.5
    data2 = data1[mask]
    weights2 = weights1[mask]
    a = cvm_2samp(data1, data2, weights1, weights2)
    prepared_data1, prepared_weights1, F1 = prepare_distribution(data1, weights1)
    b = _cvm_2samp_fast(prepared_data1, data2, prepared_weights1, weights2, cdf1=F1)
    assert numpy.allclose(a, b)
def test_cvm2samp(n_samples1=100, n_samples2=100):
    data1 = numpy.random.normal(size=n_samples1)
    weights1 = numpy.random.random(size=n_samples1)
    data2 = numpy.random.normal(size=n_samples2)
    weights2 = numpy.random.random(size=n_samples2)

    CVM = cvm_2samp(data1, data2, weights1=weights1, weights2=weights2)

    # alternative way to check
    labels = [0] * len(data1) + [1] * len(data2)
    data = numpy.concatenate([data1, data2])
    weights = numpy.concatenate([weights1, weights2])
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(labels, data, sample_weight=weights)
    # data1 corresponds to
    weights1 = numpy.diff(numpy.insert(fpr, 0, [0]))
    CVM2 = numpy.sum(weights1 * (symmetrize(fpr) - symmetrize(tpr)) ** 2)
    print(CVM, CVM2)
    assert numpy.allclose(CVM, CVM2), 'different values of CVM'
Beispiel #3
0
def test_cvm2samp(n_samples1=100, n_samples2=100):
    data1 = numpy.random.normal(size=n_samples1)
    weights1 = numpy.random.random(size=n_samples1)
    data2 = numpy.random.normal(size=n_samples2)
    weights2 = numpy.random.random(size=n_samples2)

    CVM = cvm_2samp(data1, data2, weights1=weights1, weights2=weights2)

    # alternative way to check
    labels = [0] * len(data1) + [1] * len(data2)
    data = numpy.concatenate([data1, data2])
    weights = numpy.concatenate([weights1, weights2])
    from sklearn.metrics import roc_curve
    fpr, tpr, _ = roc_curve(labels, data, sample_weight=weights)
    # data1 corresponds to
    weights1 = numpy.diff(numpy.insert(fpr, 0, [0]))
    CVM2 = numpy.sum(weights1 * (symmetrize(fpr) - symmetrize(tpr))**2)
    print(CVM, CVM2)
    assert numpy.allclose(CVM, CVM2), 'different values of CVM'