def test_recycling(): df = tibble(x=1, y=2) out = df >> summarise(across(everything(), lambda col: rep(42, col))) expect = tibble(x=rep(42, 2), y=rep(42, 2)) assert out.equals(expect) df = tibble(x=2, y=3) with pytest.raises(ValueError): df >> summarise(across(everything(), lambda col: rep(42, col)))
def test_to_functions(): df = tibble(x=c(1, NA)) # -> float out = df >> summarise(across(everything(), mean, na_rm=True)) expect = tibble(x=1.0) assert_frame_equal(out, expect) out = df >> summarise( across(everything(), dict(mean=mean, median=median), na_rm=True)) expect = tibble(x_mean=1.0, x_median=1.0) assert_frame_equal(out, expect)
def test_errors(): # wrong type with pytest.raises(ValueError): iris >> group_by(f.Species) >> filter(range(1, 10)) with pytest.raises(ValueError): iris >> filter(range(1, 10)) # wrong size with pytest.raises(ValueError): iris >> group_by(f.Species) >> filter([True, False]) with pytest.raises(ValueError): iris >> rowwise(f.Species) >> filter([True, False]) with pytest.raises(ValueError): iris >> filter([True, False]) # wrong size in column with pytest.raises(ValueError): iris >> group_by(f.Species) >> filter(tibble([True, False])) with pytest.raises(ValueError): iris >> rowwise() >> filter(tibble([True, False])) with pytest.raises(ValueError): iris >> filter(tibble([True, False])) with pytest.raises(ValueError): tibble(x=1) >> filter([True, False]) # named inputs with pytest.raises(TypeError): mtcars >> filter(x=1) with pytest.raises(TypeError): mtcars >> filter(f.y > 2, z=3) with pytest.raises(TypeError): mtcars >> filter(True, x=1) # across() in filter() does not warn yet tibble(x=1, y=2) >> filter(across(everything(), lambda x: x > 0))
def test_result_locations_aligned_with_column_names(): df = tibble(x=[1, 2], y=["a", "b"]) expect = tibble(x_cls=numpy.int64, x_type=True, y_cls=object, y_type=False) x = df >> summarise( across(everything(), { "cls": lambda x: x.dtype, "type": is_numeric })) assert_frame_equal(x, expect)
def test_summarise_cols_inside_func(): df = tibble(x=2, y=4, z=8) @register_func(None, context=None) def data_frame(**kwargs): return tibble(**kwargs) out = df >> summarise(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y)) expect = df >> summarise(across(everything(), lambda col: col / df.y)) assert out.equals(expect)
def test_mutate_cols_inside_func(): df = tibble(x=2, y=4, z=8) @register_func(None, context=None) def data_frame(**kwargs): return tibble(**kwargs) out = df >> mutate(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y)) # df.y does not work on grouped data expect = df >> mutate(across(everything(), lambda col: col / df.y)) assert out.equals(expect)
def test_not_selecting_grouping_var(): df = tibble(g=1, x=1) out = df >> group_by(f.g) >> summarise(x=across(everything())) expected = tibble(x=1) assert_frame_equal(out["x"], expected)