def test_read(self, protocol, get_random_path): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, protocol=protocol) df2 = pd.read_pickle(path) tm.assert_frame_equal(df, df2)
def test_path_localpath(self): df = tm.makeDataFrame().reset_index() result = tm.round_trip_localpath(df.to_feather, pd.read_feather) tm.assert_frame_equal(df, result)
def test_pickle_path_localpath(): df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_pickle, pd.read_pickle) tm.assert_frame_equal(df, result)
def test_write_explicit_bad(self, compression, get_random_path): with pytest.raises(ValueError, match="Unrecognized compression type"): with tm.ensure_clean(get_random_path) as path: df = tm.makeDataFrame() df.to_pickle(path, compression=compression)
def test_unknown_engine(self): with tm.ensure_clean() as path: df = tm.makeDataFrame() df.to_csv(path) with pytest.raises(ValueError, match="Unknown engine"): pd.read_csv(path, engine="pyt")
def test_passthrough_keywords(self): df = tm.makeDataFrame().reset_index() self.check_round_trip(df, write_kwargs=dict(version=1))
def test_multiple_open_close(setup_path): # gh-4409: open & close multiple times with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") # single store = HDFStore(path) assert "CLOSED" not in store.info() assert store.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open with ensure_clean_path(setup_path) as path: if pytables._table_file_open_policy_is_strict: # multiples store1 = HDFStore(path) msg = ( r"The file [\S]* is already opened\. Please close it before " r"reopening in write mode\." ) with pytest.raises(ValueError, match=msg): HDFStore(path) store1.close() else: # multiples store1 = HDFStore(path) store2 = HDFStore(path) assert "CLOSED" not in store1.info() assert "CLOSED" not in store2.info() assert store1.is_open assert store2.is_open store1.close() assert "CLOSED" in store1.info() assert not store1.is_open assert "CLOSED" not in store2.info() assert store2.is_open store2.close() assert "CLOSED" in store1.info() assert "CLOSED" in store2.info() assert not store1.is_open assert not store2.is_open # nested close store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store2.append("df2", df) store2.close() assert "CLOSED" in store2.info() assert not store2.is_open store.close() assert "CLOSED" in store.info() assert not store.is_open # double closing store = HDFStore(path, mode="w") store.append("df", df) store2 = HDFStore(path) store.close() assert "CLOSED" in store.info() assert not store.is_open store2.close() assert "CLOSED" in store2.info() assert not store2.is_open # ops on a closed store with ensure_clean_path(setup_path) as path: df = tm.makeDataFrame() df.to_hdf(path, "df", mode="w", format="table") store = HDFStore(path) store.close() msg = r"[\S]* file is not open!" with pytest.raises(ClosedFileError, match=msg): store.keys() with pytest.raises(ClosedFileError, match=msg): "df" in store with pytest.raises(ClosedFileError, match=msg): len(store) with pytest.raises(ClosedFileError, match=msg): store["df"] with pytest.raises(ClosedFileError, match=msg): store.select("df") with pytest.raises(ClosedFileError, match=msg): store.get("df") with pytest.raises(ClosedFileError, match=msg): store.append("df2", df) with pytest.raises(ClosedFileError, match=msg): store.put("df3", df) with pytest.raises(ClosedFileError, match=msg): store.get_storer("df2") with pytest.raises(ClosedFileError, match=msg): store.remove("df2") with pytest.raises(ClosedFileError, match=msg): store.select("df") msg = "'HDFStore' object has no attribute 'df'" with pytest.raises(AttributeError, match=msg): store.df
def test_path_local_path(all_parsers): parser = all_parsers df = tm.makeDataFrame() result = tm.round_trip_localpath(df.to_csv, lambda p: parser.read_csv(p, index_col=0)) tm.assert_frame_equal(df, result)
def test_select_dtypes(setup_path): with ensure_clean_store(setup_path) as store: # with a Timestamp data column (GH #2637) df = DataFrame( { "ts": bdate_range("2012-01-01", periods=300), "A": np.random.randn(300), } ) _maybe_remove(store, "df") store.append("df", df, data_columns=["ts", "A"]) result = store.select("df", "ts>=Timestamp('2012-02-01')") expected = df[df.ts >= Timestamp("2012-02-01")] tm.assert_frame_equal(expected, result) # bool columns (GH #2849) df = DataFrame(np.random.randn(5, 2), columns=["A", "B"]) df["object"] = "foo" df.loc[4:5, "object"] = "bar" df["boolv"] = df["A"] > 0 _maybe_remove(store, "df") store.append("df", df, data_columns=True) expected = df[df.boolv == True].reindex(columns=["A", "boolv"]) # noqa for v in [True, "true", 1]: result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) tm.assert_frame_equal(expected, result) expected = df[df.boolv == False].reindex(columns=["A", "boolv"]) # noqa for v in [False, "false", 0]: result = store.select("df", f"boolv == {v}", columns=["A", "boolv"]) tm.assert_frame_equal(expected, result) # integer index df = DataFrame({"A": np.random.rand(20), "B": np.random.rand(20)}) _maybe_remove(store, "df_int") store.append("df_int", df) result = store.select("df_int", "index<10 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) tm.assert_frame_equal(expected, result) # float index df = DataFrame( { "A": np.random.rand(20), "B": np.random.rand(20), "index": np.arange(20, dtype="f8"), } ) _maybe_remove(store, "df_float") store.append("df_float", df) result = store.select("df_float", "index<10.0 and columns=['A']") expected = df.reindex(index=list(df.index)[0:10], columns=["A"]) tm.assert_frame_equal(expected, result) with ensure_clean_store(setup_path) as store: # floats w/o NaN df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) store.append("df1", df, data_columns=True) result = store.select("df1", where="values>2.0") expected = df[df["values"] > 2.0] tm.assert_frame_equal(expected, result) # floats with NaN df.iloc[0] = np.nan expected = df[df["values"] > 2.0] store.append("df2", df, data_columns=True, index=False) result = store.select("df2", where="values>2.0") tm.assert_frame_equal(expected, result) # https://github.com/PyTables/PyTables/issues/282 # bug in selection when 0th row has a np.nan and an index # store.append('df3',df,data_columns=True) # result = store.select( # 'df3', where='values>2.0') # tm.assert_frame_equal(expected, result) # not in first position float with NaN ok too df = DataFrame({"cols": range(11), "values": range(11)}, dtype="float64") df["cols"] = (df["cols"] + 10).apply(str) df.iloc[1] = np.nan expected = df[df["values"] > 2.0] store.append("df4", df, data_columns=True) result = store.select("df4", where="values>2.0") tm.assert_frame_equal(expected, result) # test selection with comparison against numpy scalar # GH 11283 with ensure_clean_store(setup_path) as store: df = tm.makeDataFrame() expected = df[df["A"] > 0] store.append("df", df, data_columns=True) np_zero = np.float64(0) # noqa result = store.select("df", where=["A>np_zero"]) tm.assert_frame_equal(expected, result)
def test_append_raise(setup_path): with ensure_clean_store(setup_path) as store: # test append with invalid input to get good error messages # list in column df = tm.makeDataFrame() df["invalid"] = [["a"]] * len(df) assert df.dtypes["invalid"] == np.object_ msg = re.escape( """Cannot serialize the column [invalid] because its data contents are not [string] but [mixed] object dtype""" ) with pytest.raises(TypeError, match=msg): store.append("df", df) # multiple invalid columns df["invalid2"] = [["a"]] * len(df) df["invalid3"] = [["a"]] * len(df) with pytest.raises(TypeError, match=msg): store.append("df", df) # datetime with embedded nans as object df = tm.makeDataFrame() s = Series(datetime.datetime(2001, 1, 2), index=df.index) s = s.astype(object) s[0:5] = np.nan df["invalid"] = s assert df.dtypes["invalid"] == np.object_ msg = "too many timezones in this block, create separate data columns" with pytest.raises(TypeError, match=msg): store.append("df", df) # directly ndarray msg = "value must be None, Series, or DataFrame" with pytest.raises(TypeError, match=msg): store.append("df", np.arange(10)) # series directly msg = re.escape( "cannot properly create the storer for: " "[group->df,value-><class 'pandas.core.series.Series'>]" ) with pytest.raises(TypeError, match=msg): store.append("df", Series(np.arange(10))) # appending an incompatible table df = tm.makeDataFrame() store.append("df", df) df["foo"] = "foo" msg = re.escape( "invalid combination of [non_index_axes] on appending data " "[(1, ['A', 'B', 'C', 'D', 'foo'])] vs current table " "[(1, ['A', 'B', 'C', 'D'])]" ) with pytest.raises(ValueError, match=msg): store.append("df", df) # incompatible type (GH 41897) _maybe_remove(store, "df") df["foo"] = Timestamp("20130101") store.append("df", df) df["foo"] = "bar" msg = re.escape( "invalid combination of [values_axes] on appending data " "[name->values_block_1,cname->values_block_1," "dtype->bytes24,kind->string,shape->(1, 30)] " "vs current table " "[name->values_block_1,cname->values_block_1," "dtype->datetime64,kind->datetime64,shape->None]" ) with pytest.raises(ValueError, match=msg): store.append("df", df)
def test_pickle_path_localpath(setup_path): df = tm.makeDataFrame() result = tm.round_trip_pathlib(lambda p: df.to_hdf(p, "df"), lambda p: read_hdf(p, "df")) tm.assert_frame_equal(df, result)