Exemple #1
0
def test_ignores_null_empty():
    df = tibble(a=1)
    out = df >> bind_rows(NULL)
    assert out.equals(df)

    df0 = tibble()
    out = df >> bind_rows(df0)
    assert out.equals(df)

    # no rows
    df_no_rows = df.iloc[[], :]
    out = df >> bind_rows(df_no_rows)
    assert out.equals(df)

    # no cols
    df_no_cols = df.iloc[:, []]
    out = df >> bind_rows(df_no_cols)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(1234) >> get(1, f.a)
    assert val == 1234

    out = df_no_cols >> bind_rows(df)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(888) >> get(0, f.a)
    assert val == 888
Exemple #2
0
def test_list_as_first_argument():
    ll = tibble(a=1, b=2)
    out = bind_rows([ll])
    assert out.equals(ll)

    out = bind_rows([ll, ll])
    expect = tibble(a=[1, 1], b=[2, 2])
    assert out.equals(expect)
Exemple #3
0
def test_rowwise_vector():
    tbl = tibble(a="foo", b="bar") >> bind_rows(dict(a="A", b="B"))
    expect = tibble(a=["foo", "A"], b=["bar", "B"])
    assert tbl.equals(expect)

    id_tbl = bind_rows(None, a=dict(a=1, b=2), b=dict(a=3, b=4), _id="id")
    expect = tibble(id=["a", "b"], a=[1, 3], b=[2, 4])
    assert id_tbl.equals(expect)
Exemple #4
0
def test_create_id_col():
    df = tibble(x=range(1, 11))
    df1 = df >> head(3)
    df2 = df >> tail(2)

    out = bind_rows([df1, df2], _id="col")
    assert out.col.tolist() == [0, 0, 0, 1, 1]

    out = bind_rows(None, one=df1, two=df2, _id="col")
    assert out.col.tolist() == ["one"] * 3 + ["two"] * 2
Exemple #5
0
def test_bind_empty_dfs():
    out = bind_rows(None)
    assert dim(out) == (0, 0)

    out = bind_cols(None)
    assert dim(out) == (0, 0)

    df1 = tibble(x=factor([1, 2, 3]))
    df2 = tibble()
    out = df1 >> bind_rows(df2)
    assert out.x.tolist() == [1, 2, 3]
Exemple #6
0
def test_errors():
    df1 = tibble(x=[1, 2, 3])
    df2 = tibble(x=[4, 5, 6])
    with pytest.raises(ValueError):
        df1 >> bind_rows(df2, _id=5)

    df1 = tibble(a=factor("a"))
    df2 = tibble(a=1)
    df1 >> bind_rows(df2)  # no error, all converted to object

    with pytest.raises(ValueError):
        [1, 2] >> bind_rows()
Exemple #7
0
def test_bind_na_cols():
    df1 = tibble(x=factor(["foo", "bar"]))
    df2 = tibble(x=NA)

    out = df1 >> bind_rows(df2)
    res = out >> get(2, f.x)
    y = is_na(res)
    assert_iterable_equal(y, [True])

    out = df2 >> bind_rows(df1)
    res = out >> get(0, f.x)
    y = is_na(res)
    assert_iterable_equal(y, [True])

    y = is_categorical(out.x)
    assert y
Exemple #8
0
def test_complex():
    df1 = tibble(r=[1 + 1j, 2 - 1j])
    df2 = tibble(r=[1 - 1j, 2 + 1j])
    df3 = df1 >> bind_rows(df2)
    out = df3 >> nrow()
    assert out == 4
    assert df3.r.tolist() == df1.r.tolist() + df2.r.tolist()
Exemple #9
0
def test_bind_factors():
    df1 = tibble(a=factor("a"))
    df2 = tibble(a=factor("b"))

    out = df1 >> bind_rows(df2)
    assert out.a.cat.categories.tolist() == ["a", "b"]

    df1 = tibble(a=factor("a"))
    df2 = tibble(a=factor(NA))

    out = df1 >> bind_rows(df2)
    assert out.a.cat.categories.tolist() == ["a"]
    assert out.a.astype(object).fillna("NA").tolist() == ["a", "NA"]

    out2 = None >> bind_rows([df1, df2])
    assert_frame_equal(out2, out)
Exemple #10
0
def test_factor_to_chars():
    # we don't have warnings
    df1 = tibble(a=factor("a"))
    df2 = tibble(a="b")

    out = df1 >> bind_rows(df1, df2)
    a_type = is_factor(out.a)
    assert not a_type
Exemple #11
0
def test_int_to_float():
    df1 = tibble(a=1.0, b=2)
    df2 = tibble(a=1, b=2)
    out = df1 >> bind_rows(df2)
    a_type = is_float(out.a)
    assert a_type
    b_type = is_int(out.b)
    assert b_type
Exemple #12
0
def test_hierachical_data():
    my_list = [dict(x=1, y="a"), dict(x=2, y="b")]
    res = my_list >> bind_rows()
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out

    res = dict(x=1, y="a") >> bind_rows(dict(x=2, y="b"))
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out
Exemple #13
0
def test_group_split_bind_rows_round_trip():
    iris["Species"] = iris["Species"].astype("category")
    setosa = iris >> filter(f.Species == "setosa")

    chunks = setosa >> group_split.list(f.Species)
    assert len(chunks) == 1
    assert bind_rows(chunks).equals(setosa)

    chunks = setosa >> group_split.list(f.Species, _drop=False)
    assert len(chunks) == 3
    assert_frame_equal(chunks[0], setosa)
Exemple #14
0
def avg_weights_and_filter(owfiles):
    _log("- Averaging bin weights")
    ofile = outfile.parent / "_avg_weights_filtered.bed"
    df = None
    for owfile in owfiles:
        tmp = pandas.read_csv(owfile, sep="\t", header=0)
        df = df >> bind_rows(tmp)

    df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise(
        chrom=f.chrom1,
        start=f.start1,
        end=f.end1,
        name=paste(f.name, collapse=":"),
        score=mean(f.weight),
        strand="+",
    ) >> filter_(
        f.score >= cutoff
    ) >> ungroup() >> select(
        ~f.chrom1, ~f.start1, ~f.end1,
    )

    df.to_csv(ofile, sep="\t", index=False, header=False)
    return ofile, len(df.columns)
Exemple #15
0
def test_empty_dict():
    df = bind_rows({})
    d = df >> dim()
    assert d == (0, 0)
Exemple #16
0
def test_reorder_cols():
    df = tibble(a=1, b=2, c=3, d=4, e=5, f=6)
    df_scramble = df[sample(df.columns)]
    out = df >> bind_rows(df_scramble)
    assert out.columns.tolist() == list("abcdef")
Exemple #17
0
def test_bind_rows_grouped():
    df = tibble(x=[1, 2, 3]) >> group_by(f.x)
    out = bind_rows(df, {"x": 4})
    assert_iterable_equal(out.x.obj, [1, 2, 3, 4])
Exemple #18
0
def test_cat_ordered():
    df = tibble(x=factor([1, 2, 3], ordered=True))
    y = bind_rows(df, df)
    assert y.x.cat.ordered
Exemple #19
0
def test_non_existing_col():
    # fill with NA, but not convert whole column to NAs
    df1 = tibble(x=letters)
    df2 = tibble(x=letters[:10], y=letters[:10])
    out = df1 >> bind_rows(df2)
    assert not out.y.isna().all()
Exemple #20
0
def test_wrong_first_argument():
    with pytest.raises(NotImplementedError):
        1 >> bind_rows()