Beispiel #1
0
def test_count_no_groups(df):
    # count w/ no groups returns ttl
    assert_equal_query(
            df,
            count(),
            pd.DataFrame({'n': [4]})
            )
Beispiel #2
0
    #     >> select(_.raw_rank, _.dense_rank, _.total_goals, _.player)
)

# -

top8 >> filter(_.yr_start < 1979)

# +
# get them from game_goals

top8_games = game_goals >> inner_join(_, top8, "player")

games_per_year = (
    top8_games
    #    >> filter(_.player == "Alex Ovechkin")
    >> count(_.player, year=_.date.astype("datetime64[D]").dt.year.astype(str))
    >> group_by(_.player) >> mutate(cuml_games=_.n.cumsum(), ) >> ungroup())

alt.Chart(games_per_year).mark_line().encode(x='year:T',
                                             y='cuml_games',
                                             color="player")
# -

# +
from pandas.tseries.offsets import MonthBegin
from siuba.experimental.pd_groups import fast_summarize

top8_goals = (
    top8_games >> mutate(
        date=_.date.astype("datetime64[D]"),
        age_years=top8_games.age.str.split('-').str.get(0).astype(int)) >>
Beispiel #3
0
def test_count_on_grouped_df(df2):
    assert_equal_query(
            df2,
            group_by(_.g) >> count(_.h),
            pd.DataFrame({'g': ['a', 'b'], 'h': ['c', 'd'], 'n': [2,2]})
            )
Beispiel #4
0
def test_count_no_groups_wt(backend, df):
    assert_equal_query(
            df,
            count(wt = _.x),
            pd.DataFrame({'n': [sum([1,2,3,4])]})
            )
Beispiel #5
0
def test_count_wt(backend, df):
    assert_equal_query(
            df,
            count(_.g, wt = _.x),
            pd.DataFrame({'g': ['a', 'b'], 'n': [1 + 2, 3 + 4]})
            )
Beispiel #6
0
def test_count_with_kwarg_expression(df):
    assert_equal_query(
            df,
            count(y = _.x - _.x),
            pd.DataFrame({"y": [0], "n": [4]})
            )
Beispiel #7
0
from .helpers import assert_equal_query, data_frame, backend_notimpl, backend_sql

DATA = data_frame(x = [1,2,3,4], g = ['a', 'a', 'b', 'b'])
DATA2 = data_frame(x = [1,2,3,4], g = ['a', 'a', 'b', 'b'], h = ['c', 'c', 'd', 'd'])

@pytest.fixture(scope = "module")
def df(backend):
    return backend.load_df(DATA)

@pytest.fixture(scope = "module")
def df2(backend):
    return backend.load_df(DATA2)


@pytest.mark.parametrize("query, output", [
    (count(_.g), data_frame(g = ['a', 'b'], n = [2, 2])),
    (count("g"), data_frame(g = ['a', 'b'], n = [2, 2])),
    (count("x", "g"), DATA.assign(n = 1)),
    (count(_.x, "g"), DATA.assign(n = 1))
    ])
def test_basic_count(df, query, output):
    assert_equal_query(df, query, output)


@pytest.mark.skip("TODO: sql fix unnamed expression labels in count (#69)")
def test_count_with_expression(df):
    assert_equal_query(
            df,
            count(_.x - _.x),
            pd.DataFrame({"x - x": [0], "n": [4]})
            )