Esempio n. 1
0
def test_swap_random(data, seed):
    a, b = data
    np.random.seed(seed)
    a_orig, b_orig = original.swap_random(a, b)
    dcst_private._seed_numba(seed)
    a_out, b_out = dcst.swap_random(a, b)

    assert len(a_out) == len(b_out) == len(a) == len(b)

    # Each entry should be present same number of times
    ab = np.sort(np.concatenate((a, b)))
    ab_out = np.sort(np.concatenate((a_out, b_out)))
    assert np.allclose(ab, ab_out, atol=atol, equal_nan=True)

    # Check for swaps matching
    for i in range(len(a)):
        ab = np.array([a[i], b[i]])
        ab_out = np.array([a_out[i], b_out[i]])
        assert ab[0] in ab_out
        assert ab[1] in ab_out
Esempio n. 2
0
def test_pandas_conversion(seed):
    df = pd.DataFrame({
        "a": [3, 2, 1, 4],
        "b": [8, 6, 7, 5],
        "c": [9.1, 10.1, 11.1, np.nan]
    })

    x, y = dcst.ecdf(df.loc[:, "a"])
    assert (x == np.array([1, 2, 3, 4])).all()
    assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all()

    x, y = dcst.ecdf(df.loc[:, "c"])
    assert np.allclose(x, np.array([9.1, 10.1, 11.1]))
    assert np.allclose(y, np.array([1 / 3, 2 / 3, 1.0]))

    df = pd.DataFrame({
        "a":
        np.concatenate((np.random.normal(0, 1, size=10), [np.nan] * 990)),
        "b":
        np.random.normal(0, 1, size=1000),
    })
    correct, _ = st.ks_2samp(df["a"].dropna(), df["b"])
    assert np.isclose(dcst.ks_stat(df["a"], df["b"]), correct)

    df = pd.DataFrame({
        "a":
        np.concatenate((np.random.normal(0, 1, size=80), [np.nan] * 20)),
        "b":
        np.random.normal(0, 1, size=100),
    })
    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df["a"].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df["a"], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df["b"].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df["b"], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_perm_reps(df["a"].values,
                                  df["b"].values,
                                  dcst.diff_of_means,
                                  size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(
        dcst.draw_perm_reps(df["a"], df["b"], dcst.diff_of_means, size=100),
        correct,
        atol=atol,
    )