def test_merge(self): avals = tm.randn(2, 10) bvals = tm.randn(2, 10) ref_cols = Index(["e", "a", "b", "d", "f"]) ablock = make_block(avals, ref_cols.get_indexer(["e", "b"])) bblock = make_block(bvals, ref_cols.get_indexer(["a", "d"])) merged = ablock.merge(bblock) tm.assert_numpy_array_equal(merged.mgr_locs.as_array, np.array([0, 1, 2, 3], dtype=np.int64)) tm.assert_numpy_array_equal(merged.values[[0, 2]], np.array(avals)) tm.assert_numpy_array_equal(merged.values[[1, 3]], np.array(bvals))
def test_set_change_dtype(self, mgr): mgr.set("baz", np.zeros(N, dtype=bool)) mgr.set("baz", np.repeat("foo", N)) assert mgr.get("baz").dtype == np.object_ mgr2 = mgr.consolidate() mgr2.set("baz", np.repeat("foo", N)) assert mgr2.get("baz").dtype == np.object_ mgr2.set("quux", tm.randn(N).astype(int)) assert mgr2.get("quux").dtype == np.int_ mgr2.set("quux", tm.randn(N)) assert mgr2.get("quux").dtype == np.float_
def test_consolidate_ordering_issues(self, mgr): mgr.set("f", tm.randn(N)) mgr.set("d", tm.randn(N)) mgr.set("b", tm.randn(N)) mgr.set("g", tm.randn(N)) mgr.set("h", tm.randn(N)) # we have datetime/tz blocks in mgr cons = mgr.consolidate() assert cons.nblocks == 4 cons = mgr.consolidate().get_numeric_data() assert cons.nblocks == 1 assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64))
def test_set_change_dtype(self, mgr): mgr.insert(len(mgr.items), "baz", np.zeros(N, dtype=bool)) mgr.iset(mgr.items.get_loc("baz"), np.repeat("foo", N)) idx = mgr.items.get_loc("baz") assert mgr.iget(idx).dtype == np.object_ mgr2 = mgr.consolidate() mgr2.iset(mgr2.items.get_loc("baz"), np.repeat("foo", N)) idx = mgr2.items.get_loc("baz") assert mgr2.iget(idx).dtype == np.object_ mgr2.insert(len(mgr2.items), "quux", tm.randn(N).astype(int)) idx = mgr2.items.get_loc("quux") assert mgr2.iget(idx).dtype == np.int_ mgr2.iset(mgr2.items.get_loc("quux"), tm.randn(N)) assert mgr2.iget(idx).dtype == np.float_
def test_combine_first(self): values = tm.makeIntIndex(20).values.astype(float) series = Series(values, index=tm.makeIntIndex(20)) series_copy = series * 2 series_copy[::2] = np.NaN # nothing used from the input combined = series.combine_first(series_copy) tm.assert_series_equal(combined, series) # Holes filled from input combined = series_copy.combine_first(series) assert np.isfinite(combined).all() tm.assert_series_equal(combined[::2], series[::2]) tm.assert_series_equal(combined[1::2], series_copy[1::2]) # mixed types index = tm.makeStringIndex(20) floats = Series(tm.randn(20), index=index) strings = Series(tm.makeStringIndex(10), index=index[::2]) combined = strings.combine_first(floats) tm.assert_series_equal(strings, combined.loc[index[::2]]) tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]]) # corner case s = Series([1.0, 2, 3], index=[0, 1, 2]) empty = Series([], index=[], dtype=object) result = s.combine_first(empty) s.index = s.index.astype("O") tm.assert_series_equal(s, result)
def test_repr(self, datetime_series, string_series, object_series): str(datetime_series) str(string_series) str(string_series.astype(int)) str(object_series) str(Series(tm.randn(1000), index=np.arange(1000))) str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1))) # empty str(Series(dtype=object)) # with NaNs string_series[5:7] = np.NaN str(string_series) # with Nones ots = datetime_series.astype("O") ots[::2] = None repr(ots) # various names for name in [ "", 1, 1.2, "foo", "\u03B1\u03B2\u03B3", "loooooooooooooooooooooooooooooooooooooooooooooooooooong", ("foo", "bar", "baz"), (1, 2), ("foo", 1, 2.3), ("\u03B1", "\u03B2", "\u03B3"), ("\u03B1", "bar"), ]: string_series.name = name repr(string_series) biggie = Series(tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz")) repr(biggie) # 0 as name ser = Series(np.random.randn(100), name=0) rep_str = repr(ser) assert "Name: 0" in rep_str # tidy repr ser = Series(np.random.randn(1001), name=0) rep_str = repr(ser) assert "Name: 0" in rep_str ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) assert "\t" not in repr(ser) assert "\r" not in repr(ser) assert "a\n" not in repr(ser) # with empty series (#4651) s = Series([], dtype=np.int64, name="foo") assert repr(s) == "Series([], Name: foo, dtype: int64)" s = Series([], dtype=np.int64, name=None) assert repr(s) == "Series([], dtype: int64)"