read_html(url, "google", flavor=flavor) @td.skip_if_no("bs4") @td.skip_if_no("lxml") def test_same_ordering(datapath): filename = datapath("io", "data", "valid_markup.html") dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"]) dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"]) assert_framelist_equal(dfs_lxml, dfs_bs4) @pytest.mark.parametrize( "flavor", [ pytest.param("bs4", marks=td.skip_if_no("lxml")), pytest.param("lxml", marks=td.skip_if_no("lxml")), ], scope="class", ) class TestReadHtml: @pytest.fixture(autouse=True) def set_files(self, datapath): self.spam_data = datapath("io", "data", "spam.html") self.spam_data_kwargs = {} self.spam_data_kwargs["encoding"] = "UTF-8" self.banklist_data = datapath("io", "data", "banklist.html") @pytest.fixture(autouse=True, scope="function") def set_defaults(self, flavor, request): self.read_html = partial(read_html, flavor=flavor)
Fixture to set engine for use in each test case. Rather than requiring `engine=...` to be provided explicitly as an argument in each test, this fixture sets a global option to dictate which engine should be used to write Excel files. After executing the test it rolls back said change to the global option. """ option_name = f"io.excel.{ext.strip('.')}.writer" with option_context(option_name, engine): yield @pytest.mark.parametrize( "ext", [ pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), pytest.param( ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")] ), pytest.param(".ods", marks=td.skip_if_no("odf")), ], ) class TestRoundTrip: @pytest.mark.parametrize( "header,expected", [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], ) def test_read_one_empty_col_no_header(self, ext, header, expected): # xref gh-12292
class TestExcelWriterEngineTests: @pytest.mark.parametrize( "klass,ext", [ pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")), pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")), pytest.param(_XlwtWriter, ".xls", marks=td.skip_if_no("xlwt")), ], ) def test_ExcelWriter_dispatch(self, klass, ext): with tm.ensure_clean(ext) as path: with ExcelWriter(path) as writer: if ext == ".xlsx" and td.safe_import("xlsxwriter"): # xlsxwriter has preference over openpyxl if both installed assert isinstance(writer, _XlsxWriter) else: assert isinstance(writer, klass) def test_ExcelWriter_dispatch_raises(self): with pytest.raises(ValueError, match="No engine"): ExcelWriter("nothing") def test_register_writer(self): class DummyClass(ExcelWriter): called_save = False called_write_cells = False called_sheets = False _supported_extensions = ("xlsx", "xls") _engine = "dummy" def book(self): pass def _save(self): type(self).called_save = True def _write_cells(self, *args, **kwargs): type(self).called_write_cells = True @property def sheets(self): type(self).called_sheets = True @classmethod def assert_called_and_reset(cls): assert cls.called_save assert cls.called_write_cells assert not cls.called_sheets cls.called_save = False cls.called_write_cells = False register_writer(DummyClass) with option_context("io.excel.xlsx.writer", "dummy"): path = "something.xlsx" with tm.ensure_clean(path) as filepath: with ExcelWriter(filepath) as writer: assert isinstance(writer, DummyClass) df = tm.makeCustomDataframe(1, 1) df.to_excel(filepath) DummyClass.assert_called_and_reset() with tm.ensure_clean("something.xls") as filepath: df.to_excel(filepath, engine="dummy") DummyClass.assert_called_and_reset() @pytest.mark.parametrize( "ext", [ pytest.param(".xlsx", marks=td.skip_if_no("xlsxwriter")), pytest.param(".xlsx", marks=td.skip_if_no("openpyxl")), pytest.param(".ods", marks=td.skip_if_no("odf")), ], ) def test_engine_kwargs_and_kwargs_raises(self, ext): # GH 40430 msg = re.escape("Cannot use both engine_kwargs and **kwargs") with pytest.raises(ValueError, match=msg): with ExcelWriter("", engine_kwargs={"a": 1}, b=2): pass
import pytest import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd import pandas._testing as tm from pandas.core.arrays import ( PeriodArray, period_array, ) pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.15.1.dev") @pyarrow_skip def test_arrow_extension_type(): from pandas.core.arrays._arrow_utils import ArrowPeriodType p1 = ArrowPeriodType("D") p2 = ArrowPeriodType("D") p3 = ArrowPeriodType("M") assert p1.freq == "D" assert p1 == p2 assert not p1 == p3 assert hash(p1) == hash(p2) assert not hash(p1) == hash(p3)
class TestUpdate: def test_update(self): s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) s2 = Series([np.nan, 3.5, np.nan, 5.0]) s.update(s2) expected = Series([1.5, 3.5, 3.0, 5.0, np.nan]) tm.assert_series_equal(s, expected) # GH 3217 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df["c"] = np.nan df["c"].update(Series(["foo"], index=[0])) expected = DataFrame([[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "other, dtype, expected", [ # other is int ([61, 63], "int32", Series([10, 61, 12], dtype="int32")), ([61, 63], "int64", Series([10, 61, 12])), ([61, 63], float, Series([10.0, 61.0, 12.0])), ([61, 63], object, Series([10, 61, 12], dtype=object)), # other is float, but can be cast to int ([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32")), ([61.0, 63.0], "int64", Series([10, 61, 12])), ([61.0, 63.0], float, Series([10.0, 61.0, 12.0])), ([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object)), # others is float, cannot be cast to int ([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0])), ([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0])), ([61.1, 63.1], float, Series([10.0, 61.1, 12.0])), ([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object)), # other is object, cannot be cast ([(61, ), (63, )], "int32", Series([10, (61, ), 12])), ([(61, ), (63, )], "int64", Series([10, (61, ), 12])), ([(61, ), (63, )], float, Series([10.0, (61, ), 12.0])), ([(61, ), (63, )], object, Series([10, (61, ), 12])), ], ) def test_update_dtypes(self, other, dtype, expected): ser = Series([10, 11, 12], dtype=dtype) other = Series(other, index=[1, 3]) ser.update(other) tm.assert_series_equal(ser, expected) @pytest.mark.parametrize( "series, other, expected", [ # update by key ( Series({ "a": 1, "b": 2, "c": 3, "d": 4 }), { "b": 5, "c": np.nan }, Series({ "a": 1, "b": 5, "c": 3, "d": 4 }), ), # update by position (Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])), ], ) def test_update_from_non_series(self, series, other, expected): # GH 33215 series.update(other) tm.assert_series_equal(series, expected) @pytest.mark.parametrize( "data, other, expected, dtype", [ (["a", None], [None, "b"], ["a", "b"], "string[python]"), pytest.param( ["a", None], [None, "b"], ["a", "b"], "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), ([1, None], [None, 2], [1, 2], "Int64"), ([True, None], [None, False], [True, False], "boolean"), ( ["a", None], [None, "b"], ["a", "b"], CategoricalDtype(categories=["a", "b"]), ), ( [ Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT ], [ NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London") ], [Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2, "datetime64[ns, Europe/London]", ), ], ) def test_update_extension_array_series(self, data, other, expected, dtype): result = Series(data, dtype=dtype) other = Series(other, dtype=dtype) expected = Series(expected, dtype=dtype) result.update(other) tm.assert_series_equal(result, expected) def test_update_with_categorical_type(self): # GH 25744 dtype = CategoricalDtype(["a", "b", "c", "d"]) s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype) s2 = Series(["b", "a"], index=[1, 2], dtype=dtype) s1.update(s2) result = s1 expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected)
result = func(arr).array expected = func(data).array tm.assert_equal(result, expected) # Let's check the Indexes while we're here idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype] result = idx_cls(arr) expected = idx_cls(data) tm.assert_index_equal(result, expected) @pytest.fixture( params=[ "memoryview", "array", pytest.param("dask", marks=td.skip_if_no("dask.array")), pytest.param("xarray", marks=td.skip_if_no("xarray")), ] ) def array_likes(request): # GH#24539 recognize e.g xarray, dask, ... arr = np.array([1, 2, 3], dtype=np.int64) name = request.param if name == "memoryview": data = memoryview(arr) elif name == "array": # stdlib array import array data = array.array("i", arr)
action="ignore", message="time.clock has been deprecated", category=DeprecationWarning, ) yield read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] engine_params = [ # Add any engines to test here # When defusedxml is installed it triggers deprecation warnings for # xlrd and openpyxl, so catch those here pytest.param( "xlrd", marks=[ td.skip_if_no("xlrd"), pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"), ], ), pytest.param( "openpyxl", marks=[ td.skip_if_no("openpyxl"), pytest.mark.filterwarnings("ignore:.*html argument"), ], ), pytest.param( None, marks=[ td.skip_if_no("xlrd"), pytest.mark.filterwarnings("ignore:.*(tree\\.iter|html argument)"),
@pytest.fixture(params=["left", "right", "both", "neither"]) def other_closed(request): """ Secondary closed fixture to allow parametrizing over all pairs of closed. """ return request.param @pytest.fixture(params=[ None, "gzip", "bz2", "zip", "xz", pytest.param("zstd", marks=td.skip_if_no("zstandard")), ]) def compression(request): """ Fixture for trying common compression types in compression tests. """ return request.param @pytest.fixture(params=[ "gzip", "bz2", "zip", "xz", pytest.param("zstd", marks=td.skip_if_no("zstandard")), ])
if doc is not None: doc = doc.replace( '<?xml version="1.0" encoding="utf-8"?', "<?xml version='1.0' encoding='utf-8'?", ) return doc @pytest.fixture(params=["rb", "r"]) def mode(request): return request.param @pytest.fixture( params=[pytest.param("lxml", marks=td.skip_if_no("lxml")), "etree"]) def parser(request): return request.param # FILE OUTPUT def test_file_output_str_read(datapath, parser): filename = datapath("io", "data", "xml", "books.xml") df_file = read_xml(filename, parser=parser) with tm.ensure_clean("test.xml") as path: df_file.to_xml(path, parser=parser) with open(path, "rb") as f: output = f.read().decode("utf-8").strip()
def string_dtype(request): """ Parametrized fixture for string dtypes. * str * 'str' * 'U' """ return request.param @pytest.fixture( params=[ "string[python]", pytest.param( "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0") ), ] ) def nullable_string_dtype(request): """ Parametrized fixture for string dtypes. * 'string[python]' * 'string[pyarrow]' """ return request.param @pytest.fixture( params=[
res = arr_na.max(skipna=False) assert np.isnan(res) res = arr_na.min(skipna=True) assert res == MIN assert type(res) == type(MIN) res = arr_na.max(skipna=True) assert res == MAX assert type(res) == type(MAX) # ---------------------------------------------------------------------------- # Arrow interaction pyarrow_skip = td.skip_if_no("pyarrow") @pyarrow_skip def test_arrow_extension_type(): import pyarrow as pa from pandas.core.arrays.arrow._arrow_utils import ArrowIntervalType p1 = ArrowIntervalType(pa.int64(), "left") p2 = ArrowIntervalType(pa.int64(), "left") p3 = ArrowIntervalType(pa.int64(), "right") assert p1.inclusive == "left" assert p1 == p2 assert not p1 == p3
with pytest.raises(ValueError, match=msg): read_html(url, "google", flavor=flavor) @td.skip_if_no('bs4') @td.skip_if_no('lxml') def test_same_ordering(datapath): filename = datapath('io', 'data', 'valid_markup.html') dfs_lxml = read_html(filename, index_col=0, flavor=['lxml']) dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4']) assert_framelist_equal(dfs_lxml, dfs_bs4) @pytest.mark.parametrize("flavor", [ pytest.param('bs4', marks=td.skip_if_no('lxml')), pytest.param('lxml', marks=td.skip_if_no('lxml'))], scope="class") class TestReadHtml: @pytest.fixture(autouse=True) def set_files(self, datapath): self.spam_data = datapath('io', 'data', 'spam.html') self.spam_data_kwargs = {} self.spam_data_kwargs['encoding'] = 'UTF-8' self.banklist_data = datapath("io", "data", "banklist.html") @pytest.fixture(autouse=True, scope="function") def set_defaults(self, flavor, request): self.read_html = partial(read_html, flavor=flavor) yield
import numpy as np import pytest import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_dtype_equal import pandas as pd import pandas._testing as tm from pandas.core.arrays.string_arrow import ( ArrowStringArray, ArrowStringDtype, ) skip_if_no_pyarrow = td.skip_if_no("pyarrow", min_version="1.0.0") @pytest.fixture( params=["string", pytest.param("arrow_string", marks=skip_if_no_pyarrow)]) def dtype(request): return request.param @pytest.fixture def dtype_object(dtype): if dtype == "string": return pd.StringDtype else: return ArrowStringDtype
timeout = 2 while cli.list_buckets()["Buckets"] and timeout > 0: time.sleep(0.1) timeout -= 0.1 _compression_formats_params = [ (".no_compress", None), ("", None), (".gz", "gzip"), (".GZ", "gzip"), (".bz2", "bz2"), (".BZ2", "bz2"), (".zip", "zip"), (".ZIP", "zip"), (".xz", "xz"), (".XZ", "xz"), pytest.param((".zst", "zstd"), marks=td.skip_if_no("zstandard")), pytest.param((".ZST", "zstd"), marks=td.skip_if_no("zstandard")), ] @pytest.fixture(params=_compression_formats_params[1:]) def compression_format(request): return request.param @pytest.fixture(params=_compression_formats_params) def compression_ext(request): return request.param[0]
@pytest.fixture(params=tm.STRING_DTYPES) def string_dtype(request): """ Parametrized fixture for string dtypes. * str * 'str' * 'U' """ return request.param @pytest.fixture(params=[ "string[python]", pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0")), ]) def nullable_string_dtype(request): """ Parametrized fixture for string dtypes. * 'string[python]' * 'string[pyarrow]' """ return request.param @pytest.fixture(params=[ "python", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow", min_version="1.0.0")),
@pytest.fixture(params=[True, False]) def adjust(request): """adjust keyword argument for ewm""" return request.param @pytest.fixture(params=[True, False]) def ignore_na(request): """ignore_na keyword argument for ewm""" return request.param @pytest.fixture(params=[ pytest.param("numba", marks=td.skip_if_no("numba", "0.46.0")), # type: ignore[list-item] "cython", ]) def engine(request): """engine keyword argument for rolling.apply""" return request.param @pytest.fixture(params=[ pytest.param(("numba", True), marks=td.skip_if_no("numba", "0.46.0")), ("cython", True), ("cython", False), ]) def engine_and_raw(request): """engine and raw keyword arguments for rolling.apply""" return request.param
@pytest.fixture(params=[True, False]) def nogil(request): """nogil keyword argument for numba.jit""" return request.param @pytest.fixture(params=[True, False]) def nopython(request): """nopython keyword argument for numba.jit""" return request.param @pytest.fixture(params=[ pytest.param("numba", marks=td.skip_if_no("numba", "0.46.0")), "cython" ]) def engine(request): """engine keyword argument for rolling.apply""" return request.param @pytest.fixture(params=[ pytest.param(("numba", True), marks=td.skip_if_no("numba", "0.46.0")), ("cython", True), ("cython", False), ]) def engine_and_raw(request): """engine and raw keyword arguments for rolling.apply""" return request.param
import pytest import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd import pandas._testing as tm from pandas.core.arrays import ( PeriodArray, period_array, ) pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0") @pyarrow_skip def test_arrow_extension_type(): from pandas.core.arrays._arrow_utils import ArrowPeriodType p1 = ArrowPeriodType("D") p2 = ArrowPeriodType("D") p3 = ArrowPeriodType("M") assert p1.freq == "D" assert p1 == p2 assert not p1 == p3 assert hash(p1) == hash(p2) assert not hash(p1) == hash(p3)
@td.skip_if_no("bs4") @td.skip_if_no("lxml") @td.skip_if_no("html5lib") def test_same_ordering(datapath): filename = datapath("io", "data", "html", "valid_markup.html") dfs_lxml = read_html(filename, index_col=0, flavor=["lxml"]) dfs_bs4 = read_html(filename, index_col=0, flavor=["bs4"]) assert_framelist_equal(dfs_lxml, dfs_bs4) @pytest.mark.parametrize( "flavor", [ pytest.param("bs4", marks=[td.skip_if_no("bs4"), td.skip_if_no("html5lib")]), pytest.param("lxml", marks=td.skip_if_no("lxml")), ], scope="class", ) class TestReadHtml: @pytest.fixture(autouse=True) def set_files(self, datapath): self.spam_data = datapath("io", "data", "html", "spam.html") self.spam_data_kwargs = {} self.spam_data_kwargs["encoding"] = "UTF-8" self.banklist_data = datapath("io", "data", "html", "banklist.html") @pytest.fixture(autouse=True, scope="function") def set_defaults(self, flavor, request):
"(2020-01-31, 2020-07-31]", "(2020-07-31, 2021-01-31]", "(2021-01-31, 2021-07-31]", "(2021-07-31, 2022-01-31]", ], ] ), ) tm.assert_frame_equal(result, expected) @td.skip_if_no("xlrd") @pytest.mark.parametrize( "engine,ext", [ pytest.param("openpyxl", ".xlsx", marks=td.skip_if_no("openpyxl")), pytest.param("openpyxl", ".xlsm", marks=td.skip_if_no("openpyxl")), pytest.param("xlwt", ".xls", marks=td.skip_if_no("xlwt")), pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), ], ) @pytest.mark.usefixtures("set_engine") class TestExcelWriter: def test_excel_sheet_size(self, path): # GH 26080 breaking_row_count = 2 ** 20 + 1 breaking_col_count = 2 ** 14 + 1 # purposely using two arrays to prevent memory issues while testing row_arr = np.zeros(shape=(breaking_row_count, 1)) col_arr = np.zeros(shape=(1, breaking_col_count))
class TestAstype: @pytest.mark.parametrize("dtype", np.typecodes["All"]) def test_astype_empty_constructor_equality(self, dtype): # see GH#15524 if dtype not in ( "S", "V", # poor support (if any) currently "M", "m", # Generic timestamps raise a ValueError. Already tested. ): init_empty = Series([], dtype=dtype) with tm.assert_produces_warning(FutureWarning): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) @pytest.mark.parametrize("dtype", [str, np.str_]) @pytest.mark.parametrize( "series", [ Series([string.digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([string.digits * 10, tm.rands(63), tm.rands(64), np.nan, 1.0]), ], ) def test_astype_str_map(self, dtype, series): # see GH#4405 result = series.astype(dtype) expected = series.map(str) tm.assert_series_equal(result, expected) def test_astype_float_to_period(self): result = Series([np.nan]).astype("period[D]") expected = Series([NaT], dtype="period[D]") tm.assert_series_equal(result, expected) def test_astype_no_pandas_dtype(self): # https://github.com/pandas-dev/pandas/pull/24866 ser = Series([1, 2], dtype="int64") # Don't have PandasDtype in the public API, so we use `.array.dtype`, # which is a PandasDtype. result = ser.astype(ser.array.dtype) tm.assert_series_equal(result, ser) @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64]) def test_astype_generic_timestamp_no_frequency(self, dtype, request): # see GH#15524, GH#15987 data = [1] ser = Series(data) if np.dtype(dtype).name not in ["timedelta64", "datetime64"]: mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit") request.node.add_marker(mark) msg = ( fr"The '{dtype.__name__}' dtype has no unit\. " fr"Please pass in '{dtype.__name__}\[ns\]' instead." ) with pytest.raises(ValueError, match=msg): ser.astype(dtype) def test_astype_dt64_to_str(self): # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex dti = date_range("2012-01-01", periods=3) result = Series(dti).astype(str) expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype=object) tm.assert_series_equal(result, expected) def test_astype_dt64tz_to_str(self): # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern") result = Series(dti_tz).astype(str) expected = Series( [ "2012-01-01 00:00:00-05:00", "2012-01-02 00:00:00-05:00", "2012-01-03 00:00:00-05:00", ], dtype=object, ) tm.assert_series_equal(result, expected) def test_astype_datetime(self): s = Series(iNaT, dtype="M8[ns]", index=range(5)) s = s.astype("O") assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) s = s.astype("O") assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) s[1] = np.nan assert s.dtype == "M8[ns]" s = s.astype("O") assert s.dtype == np.object_ def test_astype_datetime64tz(self): s = Series(date_range("20130101", periods=3, tz="US/Eastern")) # astype result = s.astype(object) expected = Series(s.astype(object), dtype=object) tm.assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz) tm.assert_series_equal(result, s) # astype - object, preserves on construction result = Series(s.astype(object)) expected = s.astype(object) tm.assert_series_equal(result, expected) # astype - datetime64[ns, tz] with tm.assert_produces_warning(FutureWarning): # dt64->dt64tz astype deprecated result = Series(s.values).astype("datetime64[ns, US/Eastern]") tm.assert_series_equal(result, s) with tm.assert_produces_warning(FutureWarning): # dt64->dt64tz astype deprecated result = Series(s.values).astype(s.dtype) tm.assert_series_equal(result, s) result = s.astype("datetime64[ns, CET]") expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET")) tm.assert_series_equal(result, expected) def test_astype_str_cast_dt64(self): # see GH#9757 ts = Series([Timestamp("2010-01-04 00:00:00")]) s = ts.astype(str) expected = Series(["2010-01-04"]) tm.assert_series_equal(s, expected) ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")]) s = ts.astype(str) expected = Series(["2010-01-04 00:00:00-05:00"]) tm.assert_series_equal(s, expected) def test_astype_str_cast_td64(self): # see GH#9757 td = Series([Timedelta(1, unit="d")]) ser = td.astype(str) expected = Series(["1 days"]) tm.assert_series_equal(ser, expected) def test_dt64_series_astype_object(self): dt64ser = Series(date_range("20130101", periods=3)) result = dt64ser.astype(object) assert isinstance(result.iloc[0], datetime) assert result.dtype == np.object_ def test_td64_series_astype_object(self): tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]") result = tdser.astype(object) assert isinstance(result.iloc[0], timedelta) assert result.dtype == np.object_ @pytest.mark.parametrize( "data, dtype", [ (["x", "y", "z"], "string[python]"), pytest.param( ["x", "y", "z"], "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), (["x", "y", "z"], "category"), (3 * [Timestamp("2020-01-01", tz="UTC")], None), (3 * [Interval(0, 1)], None), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): # https://github.com/pandas-dev/pandas/issues/35471 ser = Series(data, dtype=dtype) if errors == "ignore": expected = ser result = ser.astype(float, errors="ignore") tm.assert_series_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): ser.astype(float, errors=errors) @pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64]) def test_astype_from_float_to_str(self, dtype): # https://github.com/pandas-dev/pandas/issues/36451 s = Series([0.1], dtype=dtype) result = s.astype(str) expected = Series(["0.1"]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "value, string_value", [ (None, "None"), (np.nan, "nan"), (NA, "<NA>"), ], ) def test_astype_to_str_preserves_na(self, value, string_value): # https://github.com/pandas-dev/pandas/issues/36904 s = Series(["a", "b", value], dtype=object) result = s.astype(str) expected = Series(["a", "b", string_value], dtype=object) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"]) def test_astype(self, dtype): s = Series(np.random.randn(5), name="foo") as_typed = s.astype(dtype) assert as_typed.dtype == dtype assert as_typed.name == s.name @pytest.mark.parametrize("value", [np.nan, np.inf]) @pytest.mark.parametrize("dtype", [np.int32, np.int64]) def test_astype_cast_nan_inf_int(self, dtype, value): # gh-14265: check NaN and inf raise error when converting to int msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" s = Series([value]) with pytest.raises(ValueError, match=msg): s.astype(dtype) @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) msg = r"invalid literal for int\(\) with base 10: 'car'" with pytest.raises(ValueError, match=msg): arr.astype(dtype) def test_astype_cast_object_int(self): arr = Series(["1", "2", "3", "4"], dtype=object) result = arr.astype(int) tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_unicode(self): # see GH#7758: A bit of magic is required to set # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series(["データーサイエンス、お前はもう死んでいる"]), ] former_encoding = None if sys.getdefaultencoding() == "utf-8": test_series.append(Series(["野菜食べないとやばい".encode()])) for s in test_series: res = s.astype("unicode") expec = s.map(str) tm.assert_series_equal(res, expec) # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) sys.setdefaultencoding(former_encoding) def test_astype_bytes(self): # GH#39474 result = Series(["foo", "bar", "baz"]).astype(bytes) assert result.dtypes == np.dtype("S3") def test_astype_nan_to_bool(self): # GH#43018 ser = Series(np.nan, dtype="object") result = ser.astype("bool") expected = Series(True, dtype="bool") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "dtype", tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES, ) def test_astype_ea_to_datetimetzdtype(self, dtype): # GH37553 result = Series([4, 0, 9], dtype=dtype).astype(DatetimeTZDtype(tz="US/Pacific")) expected = Series( { 0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"), 1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"), 2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"), } ) if dtype in tm.FLOAT_EA_DTYPES: expected = Series( { 0: Timestamp( "1970-01-01 00:00:00.000000004-08:00", tz="US/Pacific" ), 1: Timestamp( "1970-01-01 00:00:00.000000000-08:00", tz="US/Pacific" ), 2: Timestamp( "1970-01-01 00:00:00.000000009-08:00", tz="US/Pacific" ), } ) tm.assert_series_equal(result, expected) def test_astype_retain_Attrs(self, any_numpy_dtype): # GH#44414 ser = Series([0, 1, 2, 3]) ser.attrs["Location"] = "Michigan" result = ser.astype(any_numpy_dtype).attrs expected = ser.attrs tm.assert_dict_equal(expected, result)
return request.param @pytest.fixture(params=[True, False]) def ignore_na(request): """ignore_na keyword argument for ewm""" return request.param @pytest.fixture(params=[True, False]) def numeric_only(request): """numeric_only keyword argument""" return request.param @pytest.fixture(params=[pytest.param("numba", marks=td.skip_if_no("numba")), "cython"]) def engine(request): """engine keyword argument for rolling.apply""" return request.param @pytest.fixture( params=[ pytest.param(("numba", True), marks=td.skip_if_no("numba")), ("cython", True), ("cython", False), ] ) def engine_and_raw(request): """engine and raw keyword arguments for rolling.apply""" return request.param
class TestAstype: def test_astype_float(self, float_frame): casted = float_frame.astype(int) expected = DataFrame( float_frame.values.astype(int), index=float_frame.index, columns=float_frame.columns, ) tm.assert_frame_equal(casted, expected) casted = float_frame.astype(np.int32) expected = DataFrame( float_frame.values.astype(np.int32), index=float_frame.index, columns=float_frame.columns, ) tm.assert_frame_equal(casted, expected) float_frame["foo"] = "5" casted = float_frame.astype(int) expected = DataFrame( float_frame.values.astype(int), index=float_frame.index, columns=float_frame.columns, ) tm.assert_frame_equal(casted, expected) def test_astype_mixed_float(self, mixed_float_frame): # mixed casting casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float32") _check_cast(casted, "float32") casted = mixed_float_frame.reindex(columns=["A", "B"]).astype("float16") _check_cast(casted, "float16") def test_astype_mixed_type(self, mixed_type_frame): # mixed casting mn = mixed_type_frame._get_numeric_data().copy() mn["little_float"] = np.array(12345.0, dtype="float16") mn["big_float"] = np.array(123456789101112.0, dtype="float64") casted = mn.astype("float64") _check_cast(casted, "float64") casted = mn.astype("int64") _check_cast(casted, "int64") casted = mn.reindex(columns=["little_float"]).astype("float16") _check_cast(casted, "float16") casted = mn.astype("float32") _check_cast(casted, "float32") casted = mn.astype("int32") _check_cast(casted, "int32") # to object casted = mn.astype("O") _check_cast(casted, "object") def test_astype_with_exclude_string(self, float_frame): df = float_frame.copy() expected = float_frame.astype(int) df["string"] = "foo" casted = df.astype(int, errors="ignore") expected["string"] = "foo" tm.assert_frame_equal(casted, expected) df = float_frame.copy() expected = float_frame.astype(np.int32) df["string"] = "foo" casted = df.astype(np.int32, errors="ignore") expected["string"] = "foo" tm.assert_frame_equal(casted, expected) def test_astype_with_view_float(self, float_frame): # this is the only real reason to do it this way tf = np.round(float_frame).astype(np.int32) casted = tf.astype(np.float32, copy=False) # TODO(wesm): verification? tf = float_frame.astype(np.float64) casted = tf.astype(np.int64, copy=False) # noqa def test_astype_with_view_mixed_float(self, mixed_float_frame): tf = mixed_float_frame.reindex(columns=["A", "B", "C"]) casted = tf.astype(np.int64) casted = tf.astype(np.float32) # noqa @pytest.mark.parametrize("dtype", [np.int32, np.int64]) @pytest.mark.parametrize("val", [np.nan, np.inf]) def test_astype_cast_nan_inf_int(self, val, dtype): # see GH#14265 # # Check NaN and inf --> raise error when converting to int. msg = "Cannot convert non-finite values \\(NA or inf\\) to integer" df = DataFrame([val]) with pytest.raises(ValueError, match=msg): df.astype(dtype) def test_astype_str(self): # see GH#9757 a = Series(date_range("2010-01-04", periods=5)) b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) c = Series([Timedelta(x, unit="d") for x in range(5)]) d = Series(range(5)) e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) # Datetime-like result = df.astype(str) expected = DataFrame( { "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))), "b": list(map(str, map(Timestamp, b._values))), "c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)), "d": list(map(str, d._values)), "e": list(map(str, e._values)), } ) tm.assert_frame_equal(result, expected) def test_astype_str_float(self): # see GH#11302 result = DataFrame([np.NaN]).astype(str) expected = DataFrame(["nan"]) tm.assert_frame_equal(result, expected) result = DataFrame([1.12345678901234567890]).astype(str) val = "1.1234567890123457" expected = DataFrame([val]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype_class", [dict, Series]) def test_astype_dict_like(self, dtype_class): # GH7271 & GH16717 a = Series(date_range("2010-01-04", periods=5)) b = Series(range(5)) c = Series([0.0, 0.2, 0.4, 0.6, 0.8]) d = Series(["1.0", "2", "3.14", "4", "5.4"]) df = DataFrame({"a": a, "b": b, "c": c, "d": d}) original = df.copy(deep=True) # change type of a subset of columns dt1 = dtype_class({"b": "str", "d": "float32"}) result = df.astype(dt1) expected = DataFrame( { "a": a, "b": Series(["0", "1", "2", "3", "4"]), "c": c, "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float32"), } ) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(df, original) dt2 = dtype_class({"b": np.float32, "c": "float32", "d": np.float64}) result = df.astype(dt2) expected = DataFrame( { "a": a, "b": Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float32"), "c": Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype="float32"), "d": Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype="float64"), } ) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(df, original) # change all columns dt3 = dtype_class({"a": str, "b": str, "c": str, "d": str}) tm.assert_frame_equal(df.astype(dt3), df.astype(str)) tm.assert_frame_equal(df, original) # error should be raised when using something other than column labels # in the keys of the dtype dict dt4 = dtype_class({"b": str, 2: str}) dt5 = dtype_class({"e": str}) msg_frame = ( "Only a column name can be used for the key in a dtype mappings argument. " "'{}' not found in columns." ) with pytest.raises(KeyError, match=msg_frame.format(2)): df.astype(dt4) with pytest.raises(KeyError, match=msg_frame.format("e")): df.astype(dt5) tm.assert_frame_equal(df, original) # if the dtypes provided are the same as the original dtypes, the # resulting DataFrame should be the same as the original DataFrame dt6 = dtype_class({col: df[col].dtype for col in df.columns}) equiv = df.astype(dt6) tm.assert_frame_equal(df, equiv) tm.assert_frame_equal(df, original) # GH#16717 # if dtypes provided is empty, the resulting DataFrame # should be the same as the original DataFrame dt7 = dtype_class({}) if dtype_class is dict else dtype_class({}, dtype=object) equiv = df.astype(dt7) tm.assert_frame_equal(df, equiv) tm.assert_frame_equal(df, original) def test_astype_duplicate_col(self): a1 = Series([1, 2, 3, 4, 5], name="a") b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name="b") a2 = Series([0, 1, 2, 3, 4], name="a") df = concat([a1, b, a2], axis=1) result = df.astype(str) a1_str = Series(["1", "2", "3", "4", "5"], dtype="str", name="a") b_str = Series(["0.1", "0.2", "0.4", "0.6", "0.8"], dtype=str, name="b") a2_str = Series(["0", "1", "2", "3", "4"], dtype="str", name="a") expected = concat([a1_str, b_str, a2_str], axis=1) tm.assert_frame_equal(result, expected) result = df.astype({"a": "str"}) expected = concat([a1_str, b, a2_str], axis=1) tm.assert_frame_equal(result, expected) def test_astype_duplicate_col_series_arg(self): # GH#44417 vals = np.random.randn(3, 4) df = DataFrame(vals, columns=["A", "B", "C", "A"]) dtypes = df.dtypes dtypes.iloc[0] = str dtypes.iloc[2] = "Float64" result = df.astype(dtypes) expected = DataFrame( { 0: vals[:, 0].astype(str), 1: vals[:, 1], 2: pd.array(vals[:, 2], dtype="Float64"), 3: vals[:, 3], } ) expected.columns = df.columns tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "dtype", [ "category", CategoricalDtype(), CategoricalDtype(ordered=True), CategoricalDtype(ordered=False), CategoricalDtype(categories=list("abcdef")), CategoricalDtype(categories=list("edba"), ordered=False), CategoricalDtype(categories=list("edcb"), ordered=True), ], ids=repr, ) def test_astype_categorical(self, dtype): # GH#18099 d = {"A": list("abbc"), "B": list("bccd"), "C": list("cdde")} df = DataFrame(d) result = df.astype(dtype) expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d}) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("cls", [CategoricalDtype, DatetimeTZDtype, IntervalDtype]) def test_astype_categoricaldtype_class_raises(self, cls): df = DataFrame({"A": ["a", "a", "b", "c"]}) xpr = f"Expected an instance of {cls.__name__}" with pytest.raises(TypeError, match=xpr): df.astype({"A": cls}) with pytest.raises(TypeError, match=xpr): df["A"].astype(cls) @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) def test_astype_extension_dtypes(self, dtype): # GH#22578 df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) expected1 = DataFrame( { "a": pd.array([1, 3, 5], dtype=dtype), "b": pd.array([2, 4, 6], dtype=dtype), } ) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) tm.assert_frame_equal(df.astype(dtype).astype("float64"), df) df = DataFrame([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], columns=["a", "b"]) df["b"] = df["b"].astype(dtype) expected2 = DataFrame( {"a": [1.0, 3.0, 5.0], "b": pd.array([2, 4, 6], dtype=dtype)} ) tm.assert_frame_equal(df, expected2) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) @pytest.mark.parametrize("dtype", ["Int64", "Int32", "Int16"]) def test_astype_extension_dtypes_1d(self, dtype): # GH#22578 df = DataFrame({"a": [1.0, 2.0, 3.0]}) expected1 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) df = DataFrame({"a": [1.0, 2.0, 3.0]}) df["a"] = df["a"].astype(dtype) expected2 = DataFrame({"a": pd.array([1, 2, 3], dtype=dtype)}) tm.assert_frame_equal(df, expected2) tm.assert_frame_equal(df.astype(dtype), expected1) tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1) @pytest.mark.parametrize("dtype", ["category", "Int64"]) def test_astype_extension_dtypes_duplicate_col(self, dtype): # GH#24704 a1 = Series([0, np.nan, 4], name="a") a2 = Series([np.nan, 3, 5], name="a") df = concat([a1, a2], axis=1) result = df.astype(dtype) expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "dtype", [{100: "float64", 200: "uint64"}, "category", "float64"] ) def test_astype_column_metadata(self, dtype): # GH#19920 columns = UInt64Index([100, 200, 300], name="foo") df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) df = df.astype(dtype) tm.assert_index_equal(df.columns, columns) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_from_datetimelike_to_object(self, dtype, unit): # tests astype to object dtype # GH#19223 / GH#12425 dtype = f"{dtype}[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(object) assert (result.dtypes == object).all() if dtype.startswith("M8"): assert result.iloc[0, 0] == Timestamp(1, unit=unit) else: assert result.iloc[0, 0] == Timedelta(1, unit=unit) @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit): # tests all units from numeric origination # GH#19223 / GH#12425 dtype = f"{dtype}[{unit}]" arr = np.array([[1, 2, 3]], dtype=arr_dtype) df = DataFrame(arr) result = df.astype(dtype) expected = DataFrame(arr.astype(dtype)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_to_datetime_unit(self, unit): # tests all units from datetime origination # GH#19223 dtype = f"M8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) expected = DataFrame(arr.astype(dtype)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["ns"]) def test_astype_to_timedelta_unit_ns(self, unit): # preserver the timedelta conversion # GH#19223 dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) expected = DataFrame(arr.astype(dtype)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["us", "ms", "s", "h", "m", "D"]) def test_astype_to_timedelta_unit(self, unit): # coerce to float # GH#19223 dtype = f"m8[{unit}]" arr = np.array([[1, 2, 3]], dtype=dtype) df = DataFrame(arr) result = df.astype(dtype) expected = DataFrame(df.values.astype(dtype).astype(float)) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_to_incorrect_datetimelike(self, unit): # trying to astype a m to a M, or vice-versa # GH#19224 dtype = f"M8[{unit}]" other = f"m8[{unit}]" df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) msg = "|".join( [ # BlockManager path rf"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", # ArrayManager path "cannot astype a datetimelike from " rf"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", ] ) with pytest.raises(TypeError, match=msg): df.astype(other) msg = "|".join( [ # BlockManager path rf"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", # ArrayManager path "cannot astype a timedelta from " rf"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", ] ) df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): df.astype(dtype) def test_astype_arg_for_errors(self): # GH#14878 df = DataFrame([1, 2, 3]) msg = ( "Expected value of kwarg 'errors' to be one of " "['raise', 'ignore']. Supplied value is 'True'" ) with pytest.raises(ValueError, match=re.escape(msg)): df.astype(np.float64, errors=True) df.astype(np.int8, errors="ignore") def test_astype_arg_for_errors_dictlist(self): # GH#25905 df = DataFrame( [ {"a": "1", "b": "16.5%", "c": "test"}, {"a": "2.2", "b": "15.3", "c": "another_test"}, ] ) expected = DataFrame( [ {"a": 1.0, "b": "16.5%", "c": "test"}, {"a": 2.2, "b": "15.3", "c": "another_test"}, ] ) type_dict = {"a": "float64", "b": "float64", "c": "object"} result = df.astype(dtype=type_dict, errors="ignore") tm.assert_frame_equal(result, expected) def test_astype_dt64tz(self, timezone_frame): # astype expected = np.array( [ [ Timestamp("2013-01-01 00:00:00"), Timestamp("2013-01-02 00:00:00"), Timestamp("2013-01-03 00:00:00"), ], [ Timestamp("2013-01-01 00:00:00-0500", tz="US/Eastern"), NaT, Timestamp("2013-01-03 00:00:00-0500", tz="US/Eastern"), ], [ Timestamp("2013-01-01 00:00:00+0100", tz="CET"), NaT, Timestamp("2013-01-03 00:00:00+0100", tz="CET"), ], ], dtype=object, ).T expected = DataFrame( expected, index=timezone_frame.index, columns=timezone_frame.columns, dtype=object, ) result = timezone_frame.astype(object) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning): # dt64tz->dt64 deprecated result = timezone_frame.astype("datetime64[ns]") expected = DataFrame( { "A": date_range("20130101", periods=3), "B": ( date_range("20130101", periods=3, tz="US/Eastern") .tz_convert("UTC") .tz_localize(None) ), "C": ( date_range("20130101", periods=3, tz="CET") .tz_convert("UTC") .tz_localize(None) ), } ) expected.iloc[1, 1] = NaT expected.iloc[1, 2] = NaT tm.assert_frame_equal(result, expected) def test_astype_dt64tz_to_str(self, timezone_frame): # str formatting result = timezone_frame.astype(str) expected = DataFrame( [ [ "2013-01-01", "2013-01-01 00:00:00-05:00", "2013-01-01 00:00:00+01:00", ], ["2013-01-02", "NaT", "NaT"], [ "2013-01-03", "2013-01-03 00:00:00-05:00", "2013-01-03 00:00:00+01:00", ], ], columns=timezone_frame.columns, ) tm.assert_frame_equal(result, expected) with option_context("display.max_columns", 20): result = str(timezone_frame) assert ( "0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00" ) in result assert ( "1 2013-01-02 NaT NaT" ) in result assert ( "2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00" ) in result def test_astype_empty_dtype_dict(self): # issue mentioned further down in the following issue's thread # https://github.com/pandas-dev/pandas/issues/33113 df = DataFrame() result = df.astype({}) tm.assert_frame_equal(result, df) assert result is not df @pytest.mark.parametrize( "data, dtype", [ (["x", "y", "z"], "string[python]"), pytest.param( ["x", "y", "z"], "string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0"), ), (["x", "y", "z"], "category"), (3 * [Timestamp("2020-01-01", tz="UTC")], None), (3 * [Interval(0, 1)], None), ], ) @pytest.mark.parametrize("errors", ["raise", "ignore"]) def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors): # https://github.com/pandas-dev/pandas/issues/35471 df = DataFrame(Series(data, dtype=dtype)) if errors == "ignore": expected = df result = df.astype(float, errors=errors) tm.assert_frame_equal(result, expected) else: msg = "(Cannot cast)|(could not convert)" with pytest.raises((ValueError, TypeError), match=msg): df.astype(float, errors=errors) def test_astype_tz_conversion(self): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} df = DataFrame(val) result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"}) expected = df expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"]) def test_astype_tz_object_conversion(self, tz): # GH 35973 val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")} expected = DataFrame(val) # convert expected to object dtype from other tz str (independently tested) result = expected.astype({"tz": f"datetime64[ns, {tz}]"}) result = result.astype({"tz": "object"}) # do real test: object dtype to a specified tz, different from construction tz. result = result.astype({"tz": "datetime64[ns, Europe/London]"}) tm.assert_frame_equal(result, expected) def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture): # GH#41409 tz = tz_naive_fixture dti = date_range("2016-01-01", periods=3, tz=tz) dta = dti._data dta[0] = NaT obj = frame_or_series(dta) result = obj.astype("string") # Check that Series/DataFrame.astype matches DatetimeArray.astype expected = frame_or_series(dta.astype("string")) tm.assert_equal(result, expected) item = result.iloc[0] if frame_or_series is DataFrame: item = item.iloc[0] assert item is pd.NA # For non-NA values, we should match what we get for non-EA str alt = obj.astype(str) assert np.all(alt.iloc[1:] == result.iloc[1:]) def test_astype_td64_to_string(self, frame_or_series): # GH#41409 tdi = pd.timedelta_range("1 Day", periods=3) obj = frame_or_series(tdi) expected = frame_or_series(["1 days", "2 days", "3 days"], dtype="string") result = obj.astype("string") tm.assert_equal(result, expected) def test_astype_bytes(self): # GH#39474 result = DataFrame(["foo", "bar", "baz"]).astype(bytes) assert result.dtypes[0] == np.dtype("S3") @pytest.mark.parametrize( "index_slice", [ np.s_[:2, :2], np.s_[:1, :2], np.s_[:2, :1], np.s_[::2, ::2], np.s_[::1, ::2], np.s_[::2, ::1], ], ) def test_astype_noncontiguous(self, index_slice): # GH#42396 data = np.arange(16).reshape(4, 4) df = DataFrame(data) result = df.iloc[index_slice].astype("int16") expected = df.iloc[index_slice] tm.assert_frame_equal(result, expected, check_dtype=False) def test_astype_retain_attrs(self, any_numpy_dtype): # GH#44414 df = DataFrame({"a": [0, 1, 2], "b": [3, 4, 5]}) df.attrs["Location"] = "Michigan" result = df.astype({"a": any_numpy_dtype}).attrs expected = df.attrs tm.assert_dict_equal(expected, result)
class BaseCastingTests(BaseExtensionTests): """Casting to and from ExtensionDtypes""" def test_astype_object_series(self, all_data): ser = pd.Series(all_data, name="A") result = ser.astype(object) assert result.dtype == np.dtype(object) if hasattr(result._mgr, "blocks"): assert isinstance(result._mgr.blocks[0], ObjectBlock) assert isinstance(result._mgr.array, np.ndarray) assert result._mgr.array.dtype == np.dtype(object) def test_astype_object_frame(self, all_data): df = pd.DataFrame({"A": all_data}) result = df.astype(object) if hasattr(result._mgr, "blocks"): blk = result._data.blocks[0] assert isinstance(blk, ObjectBlock), type(blk) assert isinstance(result._mgr.arrays[0], np.ndarray) assert result._mgr.arrays[0].dtype == np.dtype(object) # FIXME: these currently fail; dont leave commented-out # check that we can compare the dtypes # cmp = result.dtypes.equals(df.dtypes) # assert not cmp.any() def test_tolist(self, data): result = pd.Series(data).tolist() expected = list(data) assert result == expected def test_astype_str(self, data): result = pd.Series(data[:5]).astype(str) expected = pd.Series([str(x) for x in data[:5]], dtype=str) self.assert_series_equal(result, expected) @pytest.mark.parametrize( "nullable_string_dtype", [ "string[python]", pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow", min_version="1.0.0")), ], ) def test_astype_string(self, data, nullable_string_dtype): # GH-33465 result = pd.Series(data[:5]).astype(nullable_string_dtype) expected = pd.Series([str(x) for x in data[:5]], dtype=nullable_string_dtype) self.assert_series_equal(result, expected) def test_to_numpy(self, data): expected = np.asarray(data) result = data.to_numpy() self.assert_equal(result, expected) result = pd.Series(data).to_numpy() self.assert_equal(result, expected) def test_astype_empty_dataframe(self, dtype): # https://github.com/pandas-dev/pandas/issues/33113 df = pd.DataFrame() result = df.astype(dtype) self.assert_frame_equal(result, df) @pytest.mark.parametrize("copy", [True, False]) def test_astype_own_type(self, data, copy): # ensure that astype returns the original object for equal dtype and copy=False # https://github.com/pandas-dev/pandas/issues/28488 result = data.astype(data.dtype, copy=copy) assert (result is data) is (not copy) self.assert_extension_array_equal(result, data)