Python bind_rows 예제들, datar.dplyr.bind_rows Python 예제들

예제 #1

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_ignores_null_empty():
    df = tibble(a=1)
    out = df >> bind_rows(NULL)
    assert out.equals(df)

    df0 = tibble()
    out = df >> bind_rows(df0)
    assert out.equals(df)

    # no rows
    df_no_rows = df.iloc[[], :]
    out = df >> bind_rows(df_no_rows)
    assert out.equals(df)

    # no cols
    df_no_cols = df.iloc[:, []]
    out = df >> bind_rows(df_no_cols)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(1234) >> get(1, f.a)
    assert val == 1234

    out = df_no_cols >> bind_rows(df)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(888) >> get(0, f.a)
    assert val == 888

예제 #2

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_list_as_first_argument():
    ll = tibble(a=1, b=2)
    out = bind_rows([ll])
    assert out.equals(ll)

    out = bind_rows([ll, ll])
    expect = tibble(a=[1, 1], b=[2, 2])
    assert out.equals(expect)

예제 #3

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_rowwise_vector():
    tbl = tibble(a="foo", b="bar") >> bind_rows(dict(a="A", b="B"))
    expect = tibble(a=["foo", "A"], b=["bar", "B"])
    assert tbl.equals(expect)

    id_tbl = bind_rows(None, a=dict(a=1, b=2), b=dict(a=3, b=4), _id="id")
    expect = tibble(id=["a", "b"], a=[1, 3], b=[2, 4])
    assert id_tbl.equals(expect)

예제 #4

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_create_id_col():
    df = tibble(x=range(1, 11))
    df1 = df >> head(3)
    df2 = df >> tail(2)

    out = bind_rows([df1, df2], _id="col")
    assert out.col.tolist() == [0, 0, 0, 1, 1]

    out = bind_rows(None, one=df1, two=df2, _id="col")
    assert out.col.tolist() == ["one"] * 3 + ["two"] * 2

예제 #5

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_bind_empty_dfs():
    out = bind_rows(None)
    assert dim(out) == (0, 0)

    out = bind_cols(None)
    assert dim(out) == (0, 0)

    df1 = tibble(x=factor([1, 2, 3]))
    df2 = tibble()
    out = df1 >> bind_rows(df2)
    assert out.x.tolist() == [1, 2, 3]

예제 #6

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_errors():
    df1 = tibble(x=[1, 2, 3])
    df2 = tibble(x=[4, 5, 6])
    with pytest.raises(ValueError):
        df1 >> bind_rows(df2, _id=5)

    df1 = tibble(a=factor("a"))
    df2 = tibble(a=1)
    df1 >> bind_rows(df2)  # no error, all converted to object

    with pytest.raises(ValueError):
        [1, 2] >> bind_rows()

예제 #7

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_bind_na_cols():
    df1 = tibble(x=factor(["foo", "bar"]))
    df2 = tibble(x=NA)

    out = df1 >> bind_rows(df2)
    res = out >> get(2, f.x)
    y = is_na(res)
    assert_iterable_equal(y, [True])

    out = df2 >> bind_rows(df1)
    res = out >> get(0, f.x)
    y = is_na(res)
    assert_iterable_equal(y, [True])

    y = is_categorical(out.x)
    assert y

예제 #8

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_complex():
    df1 = tibble(r=[1 + 1j, 2 - 1j])
    df2 = tibble(r=[1 - 1j, 2 + 1j])
    df3 = df1 >> bind_rows(df2)
    out = df3 >> nrow()
    assert out == 4
    assert df3.r.tolist() == df1.r.tolist() + df2.r.tolist()

예제 #9

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_bind_factors():
    df1 = tibble(a=factor("a"))
    df2 = tibble(a=factor("b"))

    out = df1 >> bind_rows(df2)
    assert out.a.cat.categories.tolist() == ["a", "b"]

    df1 = tibble(a=factor("a"))
    df2 = tibble(a=factor(NA))

    out = df1 >> bind_rows(df2)
    assert out.a.cat.categories.tolist() == ["a"]
    assert out.a.astype(object).fillna("NA").tolist() == ["a", "NA"]

    out2 = None >> bind_rows([df1, df2])
    assert_frame_equal(out2, out)

예제 #10

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_factor_to_chars():
    # we don't have warnings
    df1 = tibble(a=factor("a"))
    df2 = tibble(a="b")

    out = df1 >> bind_rows(df1, df2)
    a_type = is_factor(out.a)
    assert not a_type

예제 #11

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_int_to_float():
    df1 = tibble(a=1.0, b=2)
    df2 = tibble(a=1, b=2)
    out = df1 >> bind_rows(df2)
    a_type = is_float(out.a)
    assert a_type
    b_type = is_int(out.b)
    assert b_type

예제 #12

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_hierachical_data():
    my_list = [dict(x=1, y="a"), dict(x=2, y="b")]
    res = my_list >> bind_rows()
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out

    res = dict(x=1, y="a") >> bind_rows(dict(x=2, y="b"))
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out

예제 #13

0

파일 보기

def test_group_split_bind_rows_round_trip():
    iris["Species"] = iris["Species"].astype("category")
    setosa = iris >> filter(f.Species == "setosa")

    chunks = setosa >> group_split.list(f.Species)
    assert len(chunks) == 1
    assert bind_rows(chunks).equals(setosa)

    chunks = setosa >> group_split.list(f.Species, _drop=False)
    assert len(chunks) == 3
    assert_frame_equal(chunks[0], setosa)

예제 #14

0

파일 보기

파일: BedConsensus.py 프로젝트: pwwang/biopipen

def avg_weights_and_filter(owfiles):
    _log("- Averaging bin weights")
    ofile = outfile.parent / "_avg_weights_filtered.bed"
    df = None
    for owfile in owfiles:
        tmp = pandas.read_csv(owfile, sep="\t", header=0)
        df = df >> bind_rows(tmp)

    df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise(
        chrom=f.chrom1,
        start=f.start1,
        end=f.end1,
        name=paste(f.name, collapse=":"),
        score=mean(f.weight),
        strand="+",
    ) >> filter_(
        f.score >= cutoff
    ) >> ungroup() >> select(
        ~f.chrom1, ~f.start1, ~f.end1,
    )

    df.to_csv(ofile, sep="\t", index=False, header=False)
    return ofile, len(df.columns)

예제 #15

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_empty_dict():
    df = bind_rows({})
    d = df >> dim()
    assert d == (0, 0)

예제 #16

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_reorder_cols():
    df = tibble(a=1, b=2, c=3, d=4, e=5, f=6)
    df_scramble = df[sample(df.columns)]
    out = df >> bind_rows(df_scramble)
    assert out.columns.tolist() == list("abcdef")

예제 #17

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_bind_rows_grouped():
    df = tibble(x=[1, 2, 3]) >> group_by(f.x)
    out = bind_rows(df, {"x": 4})
    assert_iterable_equal(out.x.obj, [1, 2, 3, 4])

예제 #18

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_cat_ordered():
    df = tibble(x=factor([1, 2, 3], ordered=True))
    y = bind_rows(df, df)
    assert y.x.cat.ordered

예제 #19

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_non_existing_col():
    # fill with NA, but not convert whole column to NAs
    df1 = tibble(x=letters)
    df2 = tibble(x=letters[:10], y=letters[:10])
    out = df1 >> bind_rows(df2)
    assert not out.y.isna().all()

예제 #20

0

파일 보기

파일: test_bind.py 프로젝트: pwwang/datar

def test_wrong_first_argument():
    with pytest.raises(NotImplementedError):
        1 >> bind_rows()