예제 #1
0
def test_errors(caplog):
    df = tibble(x=1, y=2)
    out = df >> group_by(f.x, f.y) >> summarise()
    assert "`summarise()` has grouped output by ['x']" in caplog.text
    assert out.equals(df)
    caplog.clear()

    out = tibble(x=1, y=2) >> group_by(f.x, f.y) >> summarise(z=[2, 2])
    assert "`summarise()` has grouped output by ['x', 'y']" in caplog.text
    exp = tibble(x=[1, 1], y=[2, 2], z=[2, 2])
    assert out.equals(exp)
    caplog.clear()

    out = df >> rowwise(f.x, f.y) >> summarise()
    assert "`summarise()` has grouped output by ['x', 'y']" in caplog.text
    assert out.equals(df)
    caplog.clear()

    out = df >> rowwise() >> summarise()
    assert "`summarise()` has ungrouped output" in caplog.text
    d = dim(out)
    assert d == (1, 0)
    caplog.clear()

    # unsupported type (but python objects are supported by pandas)
    # not testing for types futher
    # tibble(x=1, y=c(1, 2, 2), z=runif(3)) >> summarise(a=object())

    # incompatible size
    with pytest.raises(ValueError):
        tibble(z=1) >> summarise(x=[1, 2, 3], y=[1, 2])
    with pytest.raises(ValueError):
        tibble(z=[1, 2]) >> group_by(f.z) >> summarise(x=[1, 2, 3], y=[1, 2])
    with pytest.raises(ValueError):
        (
            tibble(z=c(1, 3))
            >> group_by(f.z)
            >> summarise(x=seq_len(f.z), y=[1, 2])
        )

    # Missing variable
    with pytest.raises(KeyError):
        summarise(mtcars, a=mean(f.not_there))

    with pytest.raises(KeyError):
        summarise(group_by(mtcars, f.cyl), a=mean(f.not_there))

    # Duplicate column names
    x = 1
    df = tibble(x, x, _name_repair="minimal")
    with pytest.raises(NameNonUniqueError):
        df >> summarise(f.x)
예제 #2
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
예제 #3
0
파일: test_select.py 프로젝트: pwwang/datar
def test_can_be_before_group_by():
    df = tibble(id=c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5),
                year=c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013,
                       2013),
                var1=rnorm(10))
    dfagg = df >> group_by(f.id, f.year) >> select(
        f.id, f.year, f.var1) >> summarise(var1=mean(f.var1))

    assert_iterable_equal(names(dfagg), ["id", "year", "var1"])
예제 #4
0
def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)
예제 #5
0
def avg_weights_and_filter(owfiles):
    _log("- Averaging bin weights")
    ofile = outfile.parent / "_avg_weights_filtered.bed"
    df = None
    for owfile in owfiles:
        tmp = pandas.read_csv(owfile, sep="\t", header=0)
        df = df >> bind_rows(tmp)

    df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise(
        chrom=f.chrom1,
        start=f.start1,
        end=f.end1,
        name=paste(f.name, collapse=":"),
        score=mean(f.weight),
        strand="+",
    ) >> filter_(
        f.score >= cutoff
    ) >> ungroup() >> select(
        ~f.chrom1, ~f.start1, ~f.end1,
    )

    df.to_csv(ofile, sep="\t", index=False, header=False)
    return ofile, len(df.columns)
예제 #6
0
def test_dup_keyword_args():
    df = tibble(g=[1, 1], a=[1.0, 2.0]) >> group_by(f.g)
    out = df >> summarise(_b=mean(f.a), b=f._b * 2)
    assert_tibble_equal(out, tibble(g=1, b=3.0))
예제 #7
0
def test_freshly_create_vars():
    df = tibble(x=range(1, 11))
    out = summarise(df, y=mean(f.x), z=f.y + 1)
    assert out.y.to_list() == [5.5]
    assert out.z.to_list() == [6.5]
예제 #8
0
def test_0_groups():
    df = tibble(x=1).loc[[], :] >> group_by(f.x)
    res = df >> mutate(y=mean(f.x), z=+mean(f.x), n=n())
    assert res.columns.tolist() == ["x", "y", "z", "n"]
    rows = res >> nrow()
    assert rows == 0
예제 #9
0
def test_with_groups__groups_eq_null_ungroups():
    ".groups = NULL ungroups"
    gf = group_by(tibble(x=[1.0, 2.0]), f.x)
    out = gf >> with_groups(NULL, mutate, y=mean(f.x))
    assert out.y.tolist() == [1.5, 1.5]