Example #1
0
def test_summarize_removes_1_grouping(backend):
    data = data_frame(a=1, b=2, c=3)
    df = backend.load_df(data)

    q1 = df >> group_by(_.a, _.b) >> summarize(n=n(_))
    assert q1.group_by == ("a")

    q2 = q1 >> summarize(n=n(_))
    assert not len(q2.group_by)
Example #2
0
                                      max_size=10))

OMNIBUS_VECTOR_FUNCS = [
    #cumall, cumany, cummean,
    #desc,
    v.dense_rank(_.x, na_option="keep"),
    #v.percent_rank(_.x),
    v.min_rank(_.x, na_option="keep"),
    v.cume_dist(_.x, na_option="keep"),
    v.row_number(_.x),
    #ntile,
    v.between(_.x, 2, 5, default=False),
    v.coalesce(_.x, 2),
    v.lead(_.x),
    v.lag(_.x),
    v.n(_.x),
    v.na_if(_.x, 2),
    #near,
    v.nth(_.x, 2),
    v.first(_.x),
    v.last(_.x,
           order_by=_.x),  # TODO: in SQL getting FROM LAST requires order by
]

VECTOR_AGG_FUNCS = [
    v.n(_.x),
    v.n(_),
]

VECTOR_FILTER_FUNCS = [
    v.dense_rank(_.x, na_option="keep") < 2,
Example #3
0
def test_summarize_unnamed_args(df):
    assert_equal_query(df, summarize(n(_)), pd.DataFrame({'n(_)': 4}))
Example #4
0
def test_summarize_removes_order_vars(backend, df):
    lazy_tbl = df >> summarize(n=n(_))

    assert not len(lazy_tbl.order_by)
Example #5
0
def test_summarize_keeps_group_vars(backend, gdf):
    q = gdf >> summarize(n=n(_))
    assert list(q.last_op.c.keys()) == ["g", "n"]
Example #6
0
def df(backend):
    return backend.load_df(DATA)


@pytest.fixture(scope="module")
def df_float(backend):
    return backend.load_df(DATA.assign(x=lambda d: d.x.astype(float)))


@pytest.fixture(scope="module")
def gdf(df):
    return df >> group_by(_.g)


@pytest.mark.parametrize("query, output", [
    (summarize(y=n(_)), data_frame(y=4)),
    (summarize(y=_.x.min()), data_frame(y=1)),
])
def test_summarize_ungrouped(df, query, output):
    assert_equal_query(df, query, output)


@pytest.mark.skip("TODO: should return 1 row (#63)")
def test_ungrouped_summarize_literal(df, query, output):
    assert_equal_query(df, summarize(y=1), data_frame(y=1))


@backend_notimpl("sqlite")
def test_summarize_after_mutate_cuml_win(backend, df_float):
    assert_equal_query(df_float,
                       mutate(y=_.x.cumsum()) >> summarize(z=_.y.max()),
Example #7
0
def test_group_by_performs_mutate(df):
    assert_equal_query(df,
                       group_by(z=_.x + _.y) >> summarize(n=n(_)),
                       data_frame(z=10, n=3))
Example #8
0
def test_vector_n(x):
    assert v.n(x) == 3