def test_errors(caplog): df = tibble(x=1, y=2) out = df >> group_by(f.x, f.y) >> summarise() assert "`summarise()` has grouped output by ['x']" in caplog.text assert out.equals(df) caplog.clear() out = tibble(x=1, y=2) >> group_by(f.x, f.y) >> summarise(z=[2, 2]) assert "`summarise()` has grouped output by ['x', 'y']" in caplog.text exp = tibble(x=[1, 1], y=[2, 2], z=[2, 2]) assert out.equals(exp) caplog.clear() out = df >> rowwise(f.x, f.y) >> summarise() assert "`summarise()` has grouped output by ['x', 'y']" in caplog.text assert out.equals(df) caplog.clear() out = df >> rowwise() >> summarise() assert "`summarise()` has ungrouped output" in caplog.text d = dim(out) assert d == (1, 0) caplog.clear() # unsupported type (but python objects are supported by pandas) # not testing for types futher # tibble(x=1, y=c(1, 2, 2), z=runif(3)) >> summarise(a=object()) # incompatible size with pytest.raises(ValueError): tibble(z=1) >> summarise(x=[1, 2, 3], y=[1, 2]) with pytest.raises(ValueError): tibble(z=[1, 2]) >> group_by(f.z) >> summarise(x=[1, 2, 3], y=[1, 2]) with pytest.raises(ValueError): ( tibble(z=c(1, 3)) >> group_by(f.z) >> summarise(x=seq_len(f.z), y=[1, 2]) ) # Missing variable with pytest.raises(KeyError): summarise(mtcars, a=mean(f.not_there)) with pytest.raises(KeyError): summarise(group_by(mtcars, f.cyl), a=mean(f.not_there)) # Duplicate column names x = 1 df = tibble(x, x, _name_repair="minimal") with pytest.raises(NameNonUniqueError): df >> summarise(f.x)
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_can_be_before_group_by(): df = tibble(id=c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5), year=c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013, 2013), var1=rnorm(10)) dfagg = df >> group_by(f.id, f.year) >> select( f.id, f.year, f.var1) >> summarise(var1=mean(f.var1)) assert_iterable_equal(names(dfagg), ["id", "year", "var1"])
def test_cache_key(): df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g) out = df >> mutate( tibble( x=across(where(is_numeric), mean).a, y=across(where(is_numeric), max).a, )) expect = df >> mutate(x=mean(f.a), y=max(f.a)) assert_frame_equal(out, expect)
def avg_weights_and_filter(owfiles): _log("- Averaging bin weights") ofile = outfile.parent / "_avg_weights_filtered.bed" df = None for owfile in owfiles: tmp = pandas.read_csv(owfile, sep="\t", header=0) df = df >> bind_rows(tmp) df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise( chrom=f.chrom1, start=f.start1, end=f.end1, name=paste(f.name, collapse=":"), score=mean(f.weight), strand="+", ) >> filter_( f.score >= cutoff ) >> ungroup() >> select( ~f.chrom1, ~f.start1, ~f.end1, ) df.to_csv(ofile, sep="\t", index=False, header=False) return ofile, len(df.columns)
def test_dup_keyword_args(): df = tibble(g=[1, 1], a=[1.0, 2.0]) >> group_by(f.g) out = df >> summarise(_b=mean(f.a), b=f._b * 2) assert_tibble_equal(out, tibble(g=1, b=3.0))
def test_freshly_create_vars(): df = tibble(x=range(1, 11)) out = summarise(df, y=mean(f.x), z=f.y + 1) assert out.y.to_list() == [5.5] assert out.z.to_list() == [6.5]
def test_0_groups(): df = tibble(x=1).loc[[], :] >> group_by(f.x) res = df >> mutate(y=mean(f.x), z=+mean(f.x), n=n()) assert res.columns.tolist() == ["x", "y", "z", "n"] rows = res >> nrow() assert rows == 0
def test_with_groups__groups_eq_null_ungroups(): ".groups = NULL ungroups" gf = group_by(tibble(x=[1.0, 2.0]), f.x) out = gf >> with_groups(NULL, mutate, y=mean(f.x)) assert out.y.tolist() == [1.5, 1.5]