def test_subclass_unstack_multi(self): # GH 15564 df = tm.SubclassedDataFrame( [[10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]], index=MultiIndex.from_tuples(list(zip(list("AABB"), list("cdcd"))), names=["aaa", "ccc"]), columns=MultiIndex.from_tuples(list(zip(list("WWXX"), list("yzyz"))), names=["www", "yyy"]), ) exp = tm.SubclassedDataFrame( [[10, 20, 11, 21, 12, 22, 13, 23], [30, 40, 31, 41, 32, 42, 33, 43]], index=Index(["A", "B"], name="aaa"), columns=MultiIndex.from_tuples( list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("cdcdcdcd"))), names=["www", "yyy", "ccc"], ), ) res = df.unstack() tm.assert_frame_equal(res, exp) res = df.unstack("ccc") tm.assert_frame_equal(res, exp) exp = tm.SubclassedDataFrame( [[10, 30, 11, 31, 12, 32, 13, 33], [20, 40, 21, 41, 22, 42, 23, 43]], index=Index(["c", "d"], name="ccc"), columns=MultiIndex.from_tuples( list(zip(list("WWWWXXXX"), list("yyzzyyzz"), list("ABABABAB"))), names=["www", "yyy", "aaa"], ), ) res = df.unstack("aaa") tm.assert_frame_equal(res, exp)
def test_start_stop_fixed(setup_path): with ensure_clean_store(setup_path) as store: # fixed, GH 8287 df = DataFrame( { "A": np.random.rand(20), "B": np.random.rand(20) }, index=date_range("20130101", periods=20), ) store.put("df", df) result = store.select("df", start=0, stop=5) expected = df.iloc[0:5, :] tm.assert_frame_equal(result, expected) result = store.select("df", start=5, stop=10) expected = df.iloc[5:10, :] tm.assert_frame_equal(result, expected) # out of range result = store.select("df", start=30, stop=40) expected = df.iloc[30:40, :] tm.assert_frame_equal(result, expected) # series s = df.A store.put("s", s) result = store.select("s", start=0, stop=5) expected = s.iloc[0:5] tm.assert_series_equal(result, expected) result = store.select("s", start=5, stop=10) expected = s.iloc[5:10] tm.assert_series_equal(result, expected) # sparse; not implemented df = tm.makeDataFrame() df.iloc[3:5, 1:3] = np.nan df.iloc[8:10, -2] = np.nan
def test_sort_index_multilevel_repr_8017(self, gen, extra): np.random.seed(0) data = np.random.randn(3, 4) columns = MultiIndex.from_tuples([("red", i) for i in gen]) df = DataFrame(data, index=list("def"), columns=columns) df2 = pd.concat( [ df, DataFrame( "world", index=list("def"), columns=MultiIndex.from_tuples([("red", extra)]), ), ], axis=1, ) # check that the repr is good # make sure that we have a correct sparsified repr # e.g. only 1 header of read assert str(df2).splitlines()[0].split() == ["red"] # GH 8017 # sorting fails after columns added # construct single-dtype then sort result = df.copy().sort_index(axis=1) expected = df.iloc[:, [0, 2, 1, 3]] tm.assert_frame_equal(result, expected) result = df2.sort_index(axis=1) expected = df2.iloc[:, [0, 2, 1, 4, 3]] tm.assert_frame_equal(result, expected) # setitem then sort result = df.copy() result[("red", extra)] = "world" result = result.sort_index(axis=1) tm.assert_frame_equal(result, expected)
def test_excel_table(self, read_ext, df_ref): if pd.read_excel.keywords["engine"] == "pyxlsb": pytest.xfail("Sheets containing datetimes not supported by pyxlsb") df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0) df2 = pd.read_excel("test1" + read_ext, sheet_name="Sheet2", skiprows=[1], index_col=0) # TODO add index to file tm.assert_frame_equal(df1, df_ref, check_names=False) tm.assert_frame_equal(df2, df_ref, check_names=False) df3 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0, skipfooter=1) tm.assert_frame_equal(df3, df1.iloc[:-1])
def test_same_name_scoping(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame(np.random.randn(20, 2), index=date_range("20130101", periods=20)) store.put("df", df, format="table") expected = df[df.index > Timestamp("20130105")] result = store.select("df", "index>datetime.datetime(2013,1,5)") tm.assert_frame_equal(result, expected) from datetime import datetime # noqa # technically an error, but allow it result = store.select("df", "index>datetime.datetime(2013,1,5)") tm.assert_frame_equal(result, expected) result = store.select("df", "index>datetime(2013,1,5)") tm.assert_frame_equal(result, expected)
def test_get_dummies_basic(self, sparse, dtype): s_list = list("abc") s_series = Series(s_list) s_series_index = Series(s_list, list("ABC")) expected = DataFrame( {"a": [1, 0, 0], "b": [0, 1, 0], "c": [0, 0, 1]}, dtype=self.effective_dtype(dtype), ) if sparse: expected = expected.apply(SparseArray, fill_value=0.0) result = get_dummies(s_list, sparse=sparse, dtype=dtype) tm.assert_frame_equal(result, expected) result = get_dummies(s_series, sparse=sparse, dtype=dtype) tm.assert_frame_equal(result, expected) expected.index = list("ABC") result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) tm.assert_frame_equal(result, expected)
def test_make_cf_table(): result2 = make_cf_table(varied_ratings, training, classes=2) result3 = make_cf_table(varied_ratings, training, classes=3) result6 = make_cf_table(varied_ratings, training, classes=6) books = [10, 20, 30] users = [100, 200, 300] matrix2 = pd.DataFrame( [[0, 0, 0], [1, 1, 0], [0, 0, 0]], index=books, columns=users ).astype(float) matrix3 = pd.DataFrame( [[0, 1, 1], [2, 2, 0], [0, 0, 0]], index=books, columns=users ).astype(float) matrix6 = pd.DataFrame( [[0, 1, 3], [5, 4, 0], [0, 0, 0]], index=books, columns=users ).astype(float) assert_frame_equal(result2, matrix2) assert_frame_equal(result3, matrix3) assert_frame_equal(result6, matrix6)
def test_get_dummies_basic_drop_first(self, sparse): # GH12402 Add a new parameter `drop_first` to avoid collinearity # Basic case s_list = list("abc") s_series = Series(s_list) s_series_index = Series(s_list, list("ABC")) expected = DataFrame({"b": [0, 1, 0], "c": [0, 0, 1]}, dtype=np.uint8) result = get_dummies(s_list, drop_first=True, sparse=sparse) if sparse: expected = expected.apply(SparseArray, fill_value=0) tm.assert_frame_equal(result, expected) result = get_dummies(s_series, drop_first=True, sparse=sparse) tm.assert_frame_equal(result, expected) expected.index = list("ABC") result = get_dummies(s_series_index, drop_first=True, sparse=sparse) tm.assert_frame_equal(result, expected)
def test_loc_index(self): # gh-17131 # a boolean index should index like a boolean numpy array df = DataFrame( np.random.random(size=(5, 10)), index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"], ) mask = df.index.map(lambda x: "alpha" in x) expected = df.loc[np.array(mask)] result = df.loc[mask] tm.assert_frame_equal(result, expected) result = df.loc[mask.values] tm.assert_frame_equal(result, expected) result = df.loc[pd.array(mask, dtype="boolean")] tm.assert_frame_equal(result, expected)
def test_loc_non_unique(self): # GH3659 # non-unique indexer with loc slice # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs # these are going to raise because the we are non monotonic df = DataFrame({ "A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8] }, index=[0, 1, 0, 1, 2, 3]) msg = "'Cannot get left slice bound for non-unique label: 1'" with pytest.raises(KeyError, match=msg): df.loc[1:] msg = "'Cannot get left slice bound for non-unique label: 0'" with pytest.raises(KeyError, match=msg): df.loc[0:] msg = "'Cannot get left slice bound for non-unique label: 1'" with pytest.raises(KeyError, match=msg): df.loc[1:2] # monotonic are ok df = DataFrame({ "A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8] }, index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0) result = df.loc[1:] expected = DataFrame({ "A": [2, 4, 5, 6], "B": [4, 6, 7, 8] }, index=[1, 1, 2, 3]) tm.assert_frame_equal(result, expected) result = df.loc[0:] tm.assert_frame_equal(result, df) result = df.loc[1:2] expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2]) tm.assert_frame_equal(result, expected)
def test_empty_field_eof(self): data = "a,b,c\n1,2,3\n4,," result = TextReader(StringIO(data), delimiter=",").read() expected = { 0: np.array([1, 4], dtype=np.int64), 1: np.array(["2", ""], dtype=object), 2: np.array(["3", ""], dtype=object), } assert_array_dicts_equal(result, expected) # GH5664 a = DataFrame([["b"], [np.nan]], columns=["a"], index=["a", "c"]) b = DataFrame([[1, 1, 1, 0], [1, 1, 1, 0]], columns=list("abcd"), index=[1, 1]) c = DataFrame( [ [1, 2, 3, 4], [6, np.nan, np.nan, np.nan], [8, 9, 10, 11], [13, 14, np.nan, np.nan], ], columns=list("abcd"), index=[0, 5, 7, 12], ) for _ in range(100): df = read_csv(StringIO("a,b\nc\n"), skiprows=0, names=["a"], engine="c") tm.assert_frame_equal(df, a) df = read_csv( StringIO("1,1,1,1,0\n" * 2 + "\n" * 2), names=list("abcd"), engine="c" ) tm.assert_frame_equal(df, b) df = read_csv( StringIO("0,1,2,3,4\n5,6\n7,8,9,10,11\n12,13,14"), names=list("abcd"), engine="c", ) tm.assert_frame_equal(df, c)
def test_iloc_empty_list_indexer_is_ok(self): df = tm.makeCustomDataframe(5, 2) # vertical empty tm.assert_frame_equal( df.iloc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.iloc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True )
def test_agg_apply_corner(ts, tsframe): # nothing to group, all NA grouped = ts.groupby(ts * np.nan) assert ts.dtype == np.float64 # groupby float64 values results in Float64Index exp = Series([], dtype=np.float64, index=Index([], dtype=np.float64)) tm.assert_series_equal(grouped.sum(), exp) tm.assert_series_equal(grouped.agg(np.sum), exp) tm.assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) # DataFrame grouped = tsframe.groupby(tsframe["A"] * np.nan) exp_df = DataFrame( columns=tsframe.columns, dtype=float, index=Index([], name="A", dtype=np.float64), ) tm.assert_frame_equal(grouped.sum(), exp_df) tm.assert_frame_equal(grouped.agg(np.sum), exp_df) tm.assert_frame_equal(grouped.apply(np.sum), exp_df)
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # GH #397 ymd = multiindex_year_month_day_dataframe_random_data df = ymd.copy() exp = ymd.copy() df.loc[2000, 4] = 0 exp.loc[2000, 4].values[:] = 0 tm.assert_frame_equal(df, exp) df["A"].loc[2000, 4] = 1 exp["A"].loc[2000, 4].values[:] = 1 tm.assert_frame_equal(df, exp) df.loc[2000] = 5 exp.loc[2000].values[:] = 5 tm.assert_frame_equal(df, exp) # this works...for now df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5
def test_dtype_and_names_error(c_parser_only): # see gh-8833: passing both dtype and names # resulting in an error reporting issue parser = c_parser_only data = """ 1.0 1 2.0 2 3.0 3 """ # base cases result = parser.read_csv(StringIO(data), sep=r"\s+", header=None) expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]]) tm.assert_frame_equal(result, expected) result = parser.read_csv(StringIO(data), sep=r"\s+", header=None, names=["a", "b"]) expected = DataFrame([[1.0, 1], [2.0, 2], [3.0, 3]], columns=["a", "b"]) tm.assert_frame_equal(result, expected) # fallback casting result = parser.read_csv( StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32} ) expected = DataFrame([[1, 1], [2, 2], [3, 3]], columns=["a", "b"]) expected["a"] = expected["a"].astype(np.int32) tm.assert_frame_equal(result, expected) data = """ 1.0 1 nan 2 3.0 3 """ # fallback casting, but not castable with pytest.raises(ValueError, match="cannot safely convert"): parser.read_csv( StringIO(data), sep=r"\s+", header=None, names=["a", "b"], dtype={"a": np.int32}, )
def test_describe_bool_frame(self): # GH#13891 df = pd.DataFrame({ "bool_data_1": [False, False, True, True], "bool_data_2": [False, True, True, True], }) result = df.describe() expected = DataFrame( { "bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3] }, index=["count", "unique", "top", "freq"], ) tm.assert_frame_equal(result, expected) df = pd.DataFrame({ "bool_data": [False, False, True, True, False], "int_data": [0, 1, 2, 3, 4], }) result = df.describe() expected = DataFrame( {"int_data": [5, 2, df.int_data.std(), 0, 1, 2, 3, 4]}, index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], ) tm.assert_frame_equal(result, expected) df = pd.DataFrame({ "bool_data": [False, False, True, True], "str_data": ["a", "b", "c", "a"] }) result = df.describe() expected = DataFrame( { "bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2] }, index=["count", "unique", "top", "freq"], ) tm.assert_frame_equal(result, expected)
def test_interp_rowwise(self): df = DataFrame({ 0: [1, 2, np.nan, 4], 1: [2, 3, 4, np.nan], 2: [np.nan, 4, 5, 6], 3: [4, np.nan, 6, 7], 4: [1, 2, 3, 4], }) result = df.interpolate(axis=1) expected = df.copy() expected.loc[3, 1] = 5 expected.loc[0, 2] = 3 expected.loc[1, 3] = 3 expected[4] = expected[4].astype(np.float64) tm.assert_frame_equal(result, expected) result = df.interpolate(axis=1, method="values") tm.assert_frame_equal(result, expected) result = df.interpolate(axis=0) expected = df.interpolate() tm.assert_frame_equal(result, expected)
def test_iloc_getitem_slice(self): df = DataFrame( [ {"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}, {"A": 1000, "B": 2000, "C": 3000}, ] ) expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) result = df.iloc[:2] tm.assert_frame_equal(result, expected) expected = DataFrame([{"A": 100, "B": 200}], index=[1]) result = df.iloc[1:2, 0:2] tm.assert_frame_equal(result, expected) expected = DataFrame( [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}] ) result = df.iloc[:, lambda df: [0, 2]] tm.assert_frame_equal(result, expected)
def test_crosstab_ndarray(self, box): # GH 44076 a = box(np.random.randint(0, 5, size=100)) b = box(np.random.randint(0, 3, size=100)) c = box(np.random.randint(0, 10, size=100)) df = DataFrame({"a": a, "b": b, "c": c}) result = crosstab(a, [b, c], rownames=["a"], colnames=("b", "c")) expected = crosstab(df["a"], [df["b"], df["c"]]) tm.assert_frame_equal(result, expected) result = crosstab([b, c], a, colnames=["a"], rownames=("b", "c")) expected = crosstab([df["b"], df["c"]], df["a"]) tm.assert_frame_equal(result, expected) # assign arbitrary names result = crosstab(a, c) expected = crosstab(df["a"], df["c"]) expected.index.names = ["row_0"] expected.columns.names = ["col_0"] tm.assert_frame_equal(result, expected)
def test_subset(self, date_range_frame): N = 10 df = date_range_frame.iloc[:N].copy() df.loc[df.index[4:8], "A"] = np.nan dates = date_range("1/1/1990", periods=N * 3, freq="25s") # with a subset of A should be the same result = df.asof(dates, subset="A") expected = df.asof(dates) tm.assert_frame_equal(result, expected) # same with A/B result = df.asof(dates, subset=["A", "B"]) expected = df.asof(dates) tm.assert_frame_equal(result, expected) # B gives df.asof result = df.asof(dates, subset="B") expected = df.resample("25s", closed="right").ffill().reindex(dates) expected.iloc[20:] = 9 tm.assert_frame_equal(result, expected)
def test_int_types(self, np_type, path): # Test np.int values read come back as int # (rather than float which is Excel's format). df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type) df.to_excel(path, "test1") reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=0) int_frame = df.astype(np.int64) tm.assert_frame_equal(int_frame, recons) recons2 = pd.read_excel(path, "test1", index_col=0) tm.assert_frame_equal(int_frame, recons2) # Test with convert_float=False comes back as float. float_frame = df.astype(float) recons = pd.read_excel(path, "test1", convert_float=False, index_col=0) tm.assert_frame_equal(recons, float_frame, check_index_type=False, check_column_type=False)
def test_frame_getitem_setitem_multislice(self): levels = [["t1", "t2"], ["a", "b", "c"]] codes = [[0, 0, 0, 1, 1], [0, 1, 2, 0, 1]] midx = MultiIndex(codes=codes, levels=levels, names=[None, "id"]) df = DataFrame({"value": [1, 2, 3, 7, 8]}, index=midx) result = df.loc[:, "value"] tm.assert_series_equal(df["value"], result) result = df.loc[df.index[1:3], "value"] tm.assert_series_equal(df["value"][1:3], result) result = df.loc[:, :] tm.assert_frame_equal(df, result) result = df df.loc[:, "value"] = 10 result["value"] = 10 tm.assert_frame_equal(df, result) df.loc[:, :] = 10 tm.assert_frame_equal(df, result)
def test_read_excel_parse_dates(self, ext): # see gh-11544, gh-12051 df = DataFrame( {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)} ) df2 = df.copy() df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y") with tm.ensure_clean(ext) as pth: df2.to_excel(pth) res = pd.read_excel(pth, index_col=0) tm.assert_frame_equal(df2, res) res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0) tm.assert_frame_equal(df, res) date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y") res = pd.read_excel( pth, parse_dates=["date_strings"], date_parser=date_parser, index_col=0 ) tm.assert_frame_equal(df, res)
def test_sort_values_multicolumn(self): A = np.arange(5).repeat(20) B = np.tile(np.arange(5), 20) random.shuffle(A) random.shuffle(B) frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) expected = frame.take(indexer) tm.assert_frame_equal(result, expected) result = frame.sort_values(by=["A", "B"], ascending=False) indexer = np.lexsort((frame["B"].rank(ascending=False), frame["A"].rank(ascending=False))) expected = frame.take(indexer) tm.assert_frame_equal(result, expected) result = frame.sort_values(by=["B", "A"]) indexer = np.lexsort((frame["A"], frame["B"])) expected = frame.take(indexer) tm.assert_frame_equal(result, expected)
def test_concat_series_partial_columns_names(self): # GH10698 foo = Series([1, 2], name="foo") bar = Series([1, 2]) baz = Series([4, 5]) result = concat([foo, bar, baz], axis=1) expected = DataFrame( {"foo": [1, 2], 0: [1, 2], 1: [4, 5]}, columns=["foo", 0, 1] ) tm.assert_frame_equal(result, expected) result = concat([foo, bar, baz], axis=1, keys=["red", "blue", "yellow"]) expected = DataFrame( {"red": [1, 2], "blue": [1, 2], "yellow": [4, 5]}, columns=["red", "blue", "yellow"], ) tm.assert_frame_equal(result, expected) result = concat([foo, bar, baz], axis=1, ignore_index=True) expected = DataFrame({0: [1, 2], 1: [1, 2], 2: [4, 5]}) tm.assert_frame_equal(result, expected)
def test_concat_multiple_tzs(self): # GH#12467 # combining datetime tz-aware and naive DataFrames ts1 = Timestamp("2015-01-01", tz=None) ts2 = Timestamp("2015-01-01", tz="UTC") ts3 = Timestamp("2015-01-01", tz="EST") df1 = DataFrame(dict(time=[ts1])) df2 = DataFrame(dict(time=[ts2])) df3 = DataFrame(dict(time=[ts3])) results = pd.concat([df1, df2]).reset_index(drop=True) expected = DataFrame(dict(time=[ts1, ts2]), dtype=object) tm.assert_frame_equal(results, expected) results = pd.concat([df1, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts1, ts3]), dtype=object) tm.assert_frame_equal(results, expected) results = pd.concat([df2, df3]).reset_index(drop=True) expected = DataFrame(dict(time=[ts2, ts3])) tm.assert_frame_equal(results, expected)
def test1_index(self): # Tests with DEMO_G.xpt using index (all numeric file) # Compare to this data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv")) data_csv = data_csv.set_index("SEQN") numeric_as_float(data_csv) # Read full file data = read_sas(self.file01, index="SEQN", format="xport") tm.assert_frame_equal(data, data_csv, check_index_type=False) # Test incremental read with `read` method. reader = read_sas(self.file01, index="SEQN", format="xport", iterator=True) data = reader.read(10) reader.close() tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False) # Test incremental read with `get_chunk` method. reader = read_sas(self.file01, index="SEQN", format="xport", chunksize=10) data = reader.get_chunk() reader.close() tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False)
def test_get_dummies_basic_drop_first_NA(self, sparse): # Test NA handling together with drop_first s_NA = ["a", "b", np.nan] res = get_dummies(s_NA, drop_first=True, sparse=sparse) exp = DataFrame({"b": [0, 1, 0]}, dtype=np.uint8) if sparse: exp = exp.apply(SparseArray, fill_value=0) tm.assert_frame_equal(res, exp) res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse) exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex( ["b", np.nan], axis=1 ) if sparse: exp_na = exp_na.apply(SparseArray, fill_value=0) tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies( [np.nan], dummy_na=True, drop_first=True, sparse=sparse ) exp_just_na = DataFrame(index=np.arange(1)) tm.assert_frame_equal(res_just_na, exp_just_na)
def test_apply_mutating(using_array_manager): # GH#35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) df_orig = df.copy() def func(row): mgr = row._mgr row.loc["a"] += 1 assert row._mgr is not mgr return row expected = df.copy() expected["a"] += 1 result = df.apply(func, axis=1) tm.assert_frame_equal(result, expected) if not using_array_manager: # INFO(ArrayManager) With BlockManager, the row is a view and mutated in place, # with ArrayManager the row is not a view, and thus not mutated in place tm.assert_frame_equal(df, result) else: tm.assert_frame_equal(df, df_orig)
def test_shift_dt64values_int_fill_deprecated(self): # GH#31971 ser = Series([pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02")]) with tm.assert_produces_warning(FutureWarning): result = ser.shift(1, fill_value=0) expected = Series([pd.Timestamp(0), ser[0]]) tm.assert_series_equal(result, expected) df = ser.to_frame() with tm.assert_produces_warning(FutureWarning): result = df.shift(1, fill_value=0) expected = expected.to_frame() tm.assert_frame_equal(result, expected) # axis = 1 df2 = DataFrame({"A": ser, "B": ser}) df2._consolidate_inplace() with tm.assert_produces_warning(FutureWarning): result = df2.shift(1, axis=1, fill_value=0) expected = DataFrame({ "A": [pd.Timestamp(0), pd.Timestamp(0)], "B": df2["A"] }) tm.assert_frame_equal(result, expected) # same thing but not consolidated # This isn't great that we get different behavior, but # that will go away when the deprecation is enforced df3 = DataFrame({"A": ser}) df3["B"] = ser assert len(df3._mgr.arrays) == 2 result = df3.shift(1, axis=1, fill_value=0) expected = DataFrame({"A": [0, 0], "B": df2["A"]}) tm.assert_frame_equal(result, expected)