Exemple #1
0
def test_n_distinct_handles_unnamed():
    x = iris.Sepal_Length
    y = iris.Sepal_Width

    out = n_distinct(iris.Sepal_Length)
    exp = n_distinct(x)

    assert out == exp

    out = n_distinct(iris.Sepal_Width)
    exp = n_distinct(y)
    assert out == exp
Exemple #2
0
def test_one_group_for_NA():
    x = c(NA, NA, NA, range(10, 0, -1), range(10, 0, -1))
    w = numpy.array(c(20, 30, 40, range(1, 11), range(1, 11))) * 10

    assert n_distinct(x, na_rm=False) == 11
    res = tibble(x=x, w=w) >> group_by(f.x) >> summarise(n=n())
    assert nrow(res) == 11
Exemple #3
0
def test_n_distinct_handles_in_na_rm():
    d = tibble(x=c([1, 2, 3, 4], NA))
    yes = True
    no = False

    out = d >> summarise(n=n_distinct(f.x, na_rm=True)) >> pull(to="list")
    assert out == [4]
    out = d >> summarise(n=n_distinct(f.x, na_rm=False)) >> pull(to="list")
    assert out == [5]
    out = d >> summarise(n=n_distinct(f.x, na_rm=yes)) >> pull(to="list")
    assert out == [4]
    out = d >> summarise(n=n_distinct(f.x, na_rm=no)) >> pull(to="list")
    assert out == [5]

    out = (d >> summarise(n=n_distinct(f.x, na_rm=True or True)) >>
           pull(to="list"))
    assert out == [4]
Exemple #4
0
def test_n_distinct_works_with_str_col():
    wrapper = lambda data, col: summarise(
        data, result=n_distinct(f[col], na_rm=True))

    df = tibble(x=[1, 1, 3, NA])
    out = wrapper(df, "x")
    exp = tibble(result=2)
    assert out.equals(exp)
Exemple #5
0
def test_n_distinct_recyles_len1_vec():
    # assert n_distinct(1, [1, 2, 3, 4]) == 4
    # assert n_distinct([1, 2, 3, 4], 1) == 4
    assert n_distinct(4) == 1
    assert n_distinct(NA, na_rm=True) == 0
    assert n_distinct([1, 2, 3, 4]) == 4

    d = tibble(x=[1, 2, 3, 4])
    res = d >> summarise(
        y=sum(f.x),
        # summrise fail to mix input and summarised data in one expression
        # n1=n_distinct(f.y, f.x),
        # n2=n_distinct(f.x, f.y),
        n3=n_distinct(f.y),
        n4=n_distinct(identity(f.y)),
        n5=n_distinct(f.x),
    )
    # assert res.n1.tolist() == [4]
    # assert res.n2.tolist() == [4]
    assert res.n3.tolist() == [1]
    assert res.n4.tolist() == [1]
    assert res.n5.tolist() == [4]

    res = (
        tibble(g=c(1, 1, 1, 1, 2, 2), x=c(1, 2, 3, 1, 1, 2)) >> group_by(f.g)
        >> summarise(
            y=sum(f.x),
            # n1=n_distinct(f.y, f.x),
            # n2=n_distinct(f.x, f.y),
            n3=n_distinct(f.y),
            n4=n_distinct(identity(f.y)),
            n5=n_distinct(f.x),
        ))
    # assert res.n1.tolist() == [3,2]
    # assert res.n2.tolist() == [3,2]
    assert res.n3.tolist() == [1, 1]
    assert res.n4.tolist() == [1, 1]
    assert res.n5.tolist() == [3, 2]
Exemple #6
0
def test_n_distinct_treats_na_correctly():
    # test_that("n_distinct treats NA correctly in the REALSXP case (#384)", {
    assert n_distinct(c(1.0, NA, NA), na_rm=False) == 2
Exemple #7
0
def test_n_distinct_respects_data():
    df = tibble(x=42)
    out = df >> summarise(n=n_distinct(df.x))
    exp = tibble(n=1)
    assert out.equals(exp)