def test_ignores_null_empty(): df = tibble(a=1) out = df >> bind_rows(NULL) assert out.equals(df) df0 = tibble() out = df >> bind_rows(df0) assert out.equals(df) # no rows df_no_rows = df.iloc[[], :] out = df >> bind_rows(df_no_rows) assert out.equals(df) # no cols df_no_cols = df.iloc[:, []] out = df >> bind_rows(df_no_cols) rows = out >> nrow() assert rows == 2 val = out.fillna(1234) >> get(1, f.a) assert val == 1234 out = df_no_cols >> bind_rows(df) rows = out >> nrow() assert rows == 2 val = out.fillna(888) >> get(0, f.a) assert val == 888
def test_list_as_first_argument(): ll = tibble(a=1, b=2) out = bind_rows([ll]) assert out.equals(ll) out = bind_rows([ll, ll]) expect = tibble(a=[1, 1], b=[2, 2]) assert out.equals(expect)
def test_rowwise_vector(): tbl = tibble(a="foo", b="bar") >> bind_rows(dict(a="A", b="B")) expect = tibble(a=["foo", "A"], b=["bar", "B"]) assert tbl.equals(expect) id_tbl = bind_rows(None, a=dict(a=1, b=2), b=dict(a=3, b=4), _id="id") expect = tibble(id=["a", "b"], a=[1, 3], b=[2, 4]) assert id_tbl.equals(expect)
def test_create_id_col(): df = tibble(x=range(1, 11)) df1 = df >> head(3) df2 = df >> tail(2) out = bind_rows([df1, df2], _id="col") assert out.col.tolist() == [0, 0, 0, 1, 1] out = bind_rows(None, one=df1, two=df2, _id="col") assert out.col.tolist() == ["one"] * 3 + ["two"] * 2
def test_bind_empty_dfs(): out = bind_rows(None) assert dim(out) == (0, 0) out = bind_cols(None) assert dim(out) == (0, 0) df1 = tibble(x=factor([1, 2, 3])) df2 = tibble() out = df1 >> bind_rows(df2) assert out.x.tolist() == [1, 2, 3]
def test_errors(): df1 = tibble(x=[1, 2, 3]) df2 = tibble(x=[4, 5, 6]) with pytest.raises(ValueError): df1 >> bind_rows(df2, _id=5) df1 = tibble(a=factor("a")) df2 = tibble(a=1) df1 >> bind_rows(df2) # no error, all converted to object with pytest.raises(ValueError): [1, 2] >> bind_rows()
def test_bind_na_cols(): df1 = tibble(x=factor(["foo", "bar"])) df2 = tibble(x=NA) out = df1 >> bind_rows(df2) res = out >> get(2, f.x) y = is_na(res) assert_iterable_equal(y, [True]) out = df2 >> bind_rows(df1) res = out >> get(0, f.x) y = is_na(res) assert_iterable_equal(y, [True]) y = is_categorical(out.x) assert y
def test_complex(): df1 = tibble(r=[1 + 1j, 2 - 1j]) df2 = tibble(r=[1 - 1j, 2 + 1j]) df3 = df1 >> bind_rows(df2) out = df3 >> nrow() assert out == 4 assert df3.r.tolist() == df1.r.tolist() + df2.r.tolist()
def test_bind_factors(): df1 = tibble(a=factor("a")) df2 = tibble(a=factor("b")) out = df1 >> bind_rows(df2) assert out.a.cat.categories.tolist() == ["a", "b"] df1 = tibble(a=factor("a")) df2 = tibble(a=factor(NA)) out = df1 >> bind_rows(df2) assert out.a.cat.categories.tolist() == ["a"] assert out.a.astype(object).fillna("NA").tolist() == ["a", "NA"] out2 = None >> bind_rows([df1, df2]) assert_frame_equal(out2, out)
def test_factor_to_chars(): # we don't have warnings df1 = tibble(a=factor("a")) df2 = tibble(a="b") out = df1 >> bind_rows(df1, df2) a_type = is_factor(out.a) assert not a_type
def test_int_to_float(): df1 = tibble(a=1.0, b=2) df2 = tibble(a=1, b=2) out = df1 >> bind_rows(df2) a_type = is_float(out.a) assert a_type b_type = is_int(out.b) assert b_type
def test_hierachical_data(): my_list = [dict(x=1, y="a"), dict(x=2, y="b")] res = my_list >> bind_rows() rows = nrow(res) assert rows == 2 out = is_int(res.x) assert out out = is_character(res.y) assert out res = dict(x=1, y="a") >> bind_rows(dict(x=2, y="b")) rows = nrow(res) assert rows == 2 out = is_int(res.x) assert out out = is_character(res.y) assert out
def test_group_split_bind_rows_round_trip(): iris["Species"] = iris["Species"].astype("category") setosa = iris >> filter(f.Species == "setosa") chunks = setosa >> group_split.list(f.Species) assert len(chunks) == 1 assert bind_rows(chunks).equals(setosa) chunks = setosa >> group_split.list(f.Species, _drop=False) assert len(chunks) == 3 assert_frame_equal(chunks[0], setosa)
def avg_weights_and_filter(owfiles): _log("- Averaging bin weights") ofile = outfile.parent / "_avg_weights_filtered.bed" df = None for owfile in owfiles: tmp = pandas.read_csv(owfile, sep="\t", header=0) df = df >> bind_rows(tmp) df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise( chrom=f.chrom1, start=f.start1, end=f.end1, name=paste(f.name, collapse=":"), score=mean(f.weight), strand="+", ) >> filter_( f.score >= cutoff ) >> ungroup() >> select( ~f.chrom1, ~f.start1, ~f.end1, ) df.to_csv(ofile, sep="\t", index=False, header=False) return ofile, len(df.columns)
def test_empty_dict(): df = bind_rows({}) d = df >> dim() assert d == (0, 0)
def test_reorder_cols(): df = tibble(a=1, b=2, c=3, d=4, e=5, f=6) df_scramble = df[sample(df.columns)] out = df >> bind_rows(df_scramble) assert out.columns.tolist() == list("abcdef")
def test_bind_rows_grouped(): df = tibble(x=[1, 2, 3]) >> group_by(f.x) out = bind_rows(df, {"x": 4}) assert_iterable_equal(out.x.obj, [1, 2, 3, 4])
def test_cat_ordered(): df = tibble(x=factor([1, 2, 3], ordered=True)) y = bind_rows(df, df) assert y.x.cat.ordered
def test_non_existing_col(): # fill with NA, but not convert whole column to NAs df1 = tibble(x=letters) df2 = tibble(x=letters[:10], y=letters[:10]) out = df1 >> bind_rows(df2) assert not out.y.isna().all()
def test_wrong_first_argument(): with pytest.raises(NotImplementedError): 1 >> bind_rows()