def test_construction_from_string(self): result = PeriodDtype('period[D]') assert is_dtype_equal(self.dtype, result) result = PeriodDtype.construct_from_string('period[D]') assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo') with pytest.raises(TypeError): PeriodDtype.construct_from_string('period[foo]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo[D]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]')
class TestIntervalDtype(Base): def create(self): return IntervalDtype('int64') def test_hash_vs_equality(self): # make sure that we satisfy is semantics dtype = self.dtype dtype2 = IntervalDtype('int64') dtype3 = IntervalDtype(dtype2) assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype assert dtype is dtype2 assert dtype2 is dtype3 assert dtype3 is dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) dtype1 = IntervalDtype('interval') dtype2 = IntervalDtype(dtype1) dtype3 = IntervalDtype('interval') assert dtype2 == dtype1 assert dtype2 == dtype2 assert dtype2 == dtype3 assert dtype2 is dtype1 assert dtype2 is dtype2 assert dtype2 is dtype3 assert hash(dtype2) == hash(dtype1) assert hash(dtype2) == hash(dtype2) assert hash(dtype2) == hash(dtype3) @pytest.mark.parametrize('subtype', [ 'interval[int64]', 'Interval[int64]', 'int64', np.dtype('int64')]) def test_construction(self, subtype): i = IntervalDtype(subtype) assert i.subtype == np.dtype('int64') assert is_interval_dtype(i) @pytest.mark.parametrize('subtype', [None, 'interval', 'Interval']) def test_construction_generic(self, subtype): # generic i = IntervalDtype(subtype) assert i.subtype is None assert is_interval_dtype(i) @pytest.mark.parametrize('subtype', [ CategoricalDtype(list('abc'), False), CategoricalDtype(list('wxyz'), True), object, str, '<U10', 'interval[category]', 'interval[object]']) def test_construction_not_supported(self, subtype): # GH 19016 msg = ('category, object, and string subtypes are not supported ' 'for IntervalDtype') with tm.assert_raises_regex(TypeError, msg): IntervalDtype(subtype) def test_construction_errors(self): msg = 'could not construct IntervalDtype' with tm.assert_raises_regex(ValueError, msg): IntervalDtype('xx') def test_construction_from_string(self): result = IntervalDtype('interval[int64]') assert is_dtype_equal(self.dtype, result) result = IntervalDtype.construct_from_string('interval[int64]') assert is_dtype_equal(self.dtype, result) @pytest.mark.parametrize('string', [ 'foo', 'interval[foo]', 'foo[int64]', 0, 3.14, ('a', 'b'), None]) def test_construction_from_string_errors(self, string): if isinstance(string, string_types): error, msg = ValueError, 'could not construct IntervalDtype' else: error, msg = TypeError, 'a string needs to be passed, got type' with tm.assert_raises_regex(error, msg): IntervalDtype.construct_from_string(string) def test_subclass(self): a = IntervalDtype('interval[int64]') b = IntervalDtype('interval[int64]') assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) def test_is_dtype(self): assert IntervalDtype.is_dtype(self.dtype) assert IntervalDtype.is_dtype('interval') assert IntervalDtype.is_dtype(IntervalDtype('float64')) assert IntervalDtype.is_dtype(IntervalDtype('int64')) assert IntervalDtype.is_dtype(IntervalDtype(np.int64)) assert not IntervalDtype.is_dtype('D') assert not IntervalDtype.is_dtype('3D') assert not IntervalDtype.is_dtype('U') assert not IntervalDtype.is_dtype('S') assert not IntervalDtype.is_dtype('foo') assert not IntervalDtype.is_dtype(np.object_) assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) def test_coerce_to_dtype(self): assert (_coerce_to_dtype('interval[int64]') == IntervalDtype('interval[int64]')) def test_equality(self): assert is_dtype_equal(self.dtype, 'interval[int64]') assert is_dtype_equal(self.dtype, IntervalDtype('int64')) assert is_dtype_equal(IntervalDtype('int64'), IntervalDtype('int64')) assert not is_dtype_equal(self.dtype, 'int64') assert not is_dtype_equal(IntervalDtype('int64'), IntervalDtype('float64')) # invalid subtype comparisons do not raise when directly compared dtype1 = IntervalDtype('float64') dtype2 = IntervalDtype('datetime64[ns, US/Eastern]') assert dtype1 != dtype2 assert dtype2 != dtype1 @pytest.mark.parametrize('subtype', [ None, 'interval', 'Interval', 'int64', 'uint64', 'float64', 'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')]) def test_equality_generic(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) assert is_dtype_equal(dtype, 'interval') assert is_dtype_equal(dtype, IntervalDtype()) @pytest.mark.parametrize('subtype', [ 'int64', 'uint64', 'float64', 'complex128', 'datetime64', 'timedelta64', PeriodDtype('Q')]) def test_name_repr(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) expected = 'interval[{subtype}]'.format(subtype=subtype) assert str(dtype) == expected assert dtype.name == 'interval' @pytest.mark.parametrize('subtype', [None, 'interval', 'Interval']) def test_name_repr_generic(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) assert str(dtype) == 'interval' assert dtype.name == 'interval' def test_basic(self): assert is_interval_dtype(self.dtype) ii = IntervalIndex.from_breaks(range(3)) assert is_interval_dtype(ii.dtype) assert is_interval_dtype(ii) s = Series(ii, name='A') # dtypes # series results in object dtype currently, assert not is_interval_dtype(s.dtype) assert not is_interval_dtype(s) def test_basic_dtype(self): assert is_interval_dtype('interval[int64]') assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) assert is_interval_dtype(IntervalIndex.from_breaks( date_range('20130101', periods=3))) assert not is_interval_dtype('U') assert not is_interval_dtype('S') assert not is_interval_dtype('foo') assert not is_interval_dtype(np.object_) assert not is_interval_dtype(np.int64) assert not is_interval_dtype(np.float64) def test_caching(self): IntervalDtype.reset_cache() dtype = IntervalDtype("int64") assert len(IntervalDtype._cache) == 1 IntervalDtype("interval") assert len(IntervalDtype._cache) == 2 IntervalDtype.reset_cache() tm.round_trip_pickle(dtype) assert len(IntervalDtype._cache) == 0
def test_is_dtype(self): self.assertTrue(PeriodDtype.is_dtype(self.dtype)) self.assertTrue(PeriodDtype.is_dtype('period[D]')) self.assertTrue(PeriodDtype.is_dtype('period[3D]')) self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('3D'))) self.assertTrue(PeriodDtype.is_dtype('period[U]')) self.assertTrue(PeriodDtype.is_dtype('period[S]')) self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('U'))) self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('S'))) self.assertFalse(PeriodDtype.is_dtype('D')) self.assertFalse(PeriodDtype.is_dtype('3D')) self.assertFalse(PeriodDtype.is_dtype('U')) self.assertFalse(PeriodDtype.is_dtype('S')) self.assertFalse(PeriodDtype.is_dtype('foo')) self.assertFalse(PeriodDtype.is_dtype(np.object_)) self.assertFalse(PeriodDtype.is_dtype(np.int64)) self.assertFalse(PeriodDtype.is_dtype(np.float64))
def test_construction_from_string(self): result = PeriodDtype('period[D]') assert is_dtype_equal(self.dtype, result) result = PeriodDtype.construct_from_string('period[D]') assert is_dtype_equal(self.dtype, result) with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo') with pytest.raises(TypeError): PeriodDtype.construct_from_string('period[foo]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('foo[D]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns]') with pytest.raises(TypeError): PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]')
def test_empty(self): dt = PeriodDtype() with pytest.raises(AttributeError): str(dt)
def create(self): return PeriodDtype('D')
def test_identity(self): assert PeriodDtype('period[D]') == PeriodDtype('period[D]') assert PeriodDtype('period[D]') is PeriodDtype('period[D]') assert PeriodDtype('period[3D]') == PeriodDtype('period[3D]') assert PeriodDtype('period[3D]') is PeriodDtype('period[3D]') assert PeriodDtype('period[1S1U]') == PeriodDtype('period[1000001U]') assert PeriodDtype('period[1S1U]') is PeriodDtype('period[1000001U]')
def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) is PeriodDtype(dtype) assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype
left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) right = np.array([50, 40]) assert not array_equivalent(left, right, strict_nan=True) @pytest.mark.parametrize( "dtype, na_value", [ # Datetime-like (np.dtype("M8[ns]"), NaT), (np.dtype("m8[ns]"), NaT), (DatetimeTZDtype.construct_from_string("datetime64[ns, US/Eastern]"), NaT), (PeriodDtype("M"), NaT), # Integer ("u1", 0), ("u2", 0), ("u4", 0), ("u8", 0), ("i1", 0), ("i2", 0), ("i4", 0), ("i8", 0), # Bool ("bool", False), # Float ("f2", np.nan), ("f4", np.nan), ("f8", np.nan),
def test_period_dtype_compare_to_string(): # https://github.com/pandas-dev/pandas/issues/37265 dtype = PeriodDtype(freq="M") assert (dtype == "period[M]") is True assert (dtype != "period[M]") is False
"Period[D]", "Period[3M]", "Period[U]", ], ) def test_period_dtype(self, dtype): assert com.pandas_dtype(dtype) is PeriodDtype(dtype) assert com.pandas_dtype(dtype) == PeriodDtype(dtype) assert com.pandas_dtype(dtype) == dtype dtypes = { "datetime_tz": com.pandas_dtype("datetime64[ns, US/Eastern]"), "datetime": com.pandas_dtype("datetime64[ns]"), "timedelta": com.pandas_dtype("timedelta64[ns]"), "period": PeriodDtype("D"), "integer": np.dtype(np.int64), "float": np.dtype(np.float64), "object": np.dtype(object), "category": com.pandas_dtype("category"), } @pytest.mark.parametrize("name1,dtype1", list(dtypes.items()), ids=lambda x: str(x)) @pytest.mark.parametrize("name2,dtype2", list(dtypes.items()), ids=lambda x: str(x)) def test_dtype_equal(name1, dtype1, name2, dtype2):
def test_constructor_cast_object(self): s = Series(period_range("1/1/2000", periods=10), dtype=PeriodDtype("D")) exp = Series(period_range("1/1/2000", periods=10)) tm.assert_series_equal(s, exp)
def test_registered(): assert PeriodDtype in registry.dtypes result = registry.find("Period[D]") expected = PeriodDtype("D") assert result == expected
class TestCategoricalDtypeParametrized: @pytest.mark.parametrize('categories', [ list('abcd'), np.arange(1000), ['a', 'b', 10, 2, 1.3, True], [True, False], pd.date_range('2017', periods=4) ]) def test_basic(self, categories, ordered_fixture): c1 = CategoricalDtype(categories, ordered=ordered_fixture) tm.assert_index_equal(c1.categories, pd.Index(categories)) assert c1.ordered is ordered_fixture def test_order_matters(self): categories = ['a', 'b'] c1 = CategoricalDtype(categories, ordered=True) c2 = CategoricalDtype(categories, ordered=False) c3 = CategoricalDtype(categories, ordered=None) assert c1 is not c2 assert c1 is not c3 @pytest.mark.parametrize('ordered', [False, None]) def test_unordered_same(self, ordered): c1 = CategoricalDtype(['a', 'b'], ordered=ordered) c2 = CategoricalDtype(['b', 'a'], ordered=ordered) assert hash(c1) == hash(c2) def test_categories(self): result = CategoricalDtype(['a', 'b', 'c']) tm.assert_index_equal(result.categories, pd.Index(['a', 'b', 'c'])) assert result.ordered is None def test_equal_but_different(self, ordered_fixture): c1 = CategoricalDtype([1, 2, 3]) c2 = CategoricalDtype([1., 2., 3.]) assert c1 is not c2 assert c1 != c2 @pytest.mark.parametrize('v1, v2', [ ([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1]), ]) def test_order_hashes_different(self, v1, v2): c1 = CategoricalDtype(v1, ordered=False) c2 = CategoricalDtype(v2, ordered=True) c3 = CategoricalDtype(v1, ordered=None) assert c1 is not c2 assert c1 is not c3 def test_nan_invalid(self): with pytest.raises(ValueError): CategoricalDtype([1, 2, np.nan]) def test_non_unique_invalid(self): with pytest.raises(ValueError): CategoricalDtype([1, 2, 1]) def test_same_categories_different_order(self): c1 = CategoricalDtype(['a', 'b'], ordered=True) c2 = CategoricalDtype(['b', 'a'], ordered=True) assert c1 is not c2 @pytest.mark.parametrize('ordered1', [True, False, None]) @pytest.mark.parametrize('ordered2', [True, False, None]) def test_categorical_equality(self, ordered1, ordered2): # same categories, same order # any combination of None/False are equal # True/True is the only combination with True that are equal c1 = CategoricalDtype(list('abc'), ordered1) c2 = CategoricalDtype(list('abc'), ordered2) result = c1 == c2 expected = bool(ordered1) is bool(ordered2) assert result is expected # same categories, different order # any combination of None/False are equal (order doesn't matter) # any combination with True are not equal (different order of cats) c1 = CategoricalDtype(list('abc'), ordered1) c2 = CategoricalDtype(list('cab'), ordered2) result = c1 == c2 expected = (bool(ordered1) is False) and (bool(ordered2) is False) assert result is expected # different categories c2 = CategoricalDtype([1, 2, 3], ordered2) assert c1 != c2 # none categories c1 = CategoricalDtype(list('abc'), ordered1) c2 = CategoricalDtype(None, ordered2) c3 = CategoricalDtype(None, ordered1) assert c1 == c2 assert c2 == c1 assert c2 == c3 @pytest.mark.parametrize('categories', [list('abc'), None]) @pytest.mark.parametrize('other', ['category', 'not a category']) def test_categorical_equality_strings(self, categories, ordered_fixture, other): c1 = CategoricalDtype(categories, ordered_fixture) result = c1 == other expected = other == 'category' assert result is expected def test_invalid_raises(self): with pytest.raises(TypeError, match='ordered'): CategoricalDtype(['a', 'b'], ordered='foo') with pytest.raises(TypeError, match="'categories' must be list-like"): CategoricalDtype('category') def test_mixed(self): a = CategoricalDtype(['a', 'b', 1, 2]) b = CategoricalDtype(['a', 'b', '1', '2']) assert hash(a) != hash(b) def test_from_categorical_dtype_identity(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # Identity test for no changes c2 = CategoricalDtype._from_categorical_dtype(c1) assert c2 is c1 def test_from_categorical_dtype_categories(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override categories result = CategoricalDtype._from_categorical_dtype(c1, categories=[2, 3]) assert result == CategoricalDtype([2, 3], ordered=True) def test_from_categorical_dtype_ordered(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override ordered result = CategoricalDtype._from_categorical_dtype(c1, ordered=False) assert result == CategoricalDtype([1, 2, 3], ordered=False) def test_from_categorical_dtype_both(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override ordered result = CategoricalDtype._from_categorical_dtype(c1, categories=[1, 2], ordered=False) assert result == CategoricalDtype([1, 2], ordered=False) def test_str_vs_repr(self, ordered_fixture): c1 = CategoricalDtype(['a', 'b'], ordered=ordered_fixture) assert str(c1) == 'category' # Py2 will have unicode prefixes pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)" assert re.match(pat.format(ordered=ordered_fixture), repr(c1)) def test_categorical_categories(self): # GH17884 c1 = CategoricalDtype(Categorical(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) c1 = CategoricalDtype(CategoricalIndex(['a', 'b'])) tm.assert_index_equal(c1.categories, pd.Index(['a', 'b'])) @pytest.mark.parametrize( 'new_categories', [list('abc'), list('cba'), list('wxyz'), None]) @pytest.mark.parametrize('new_ordered', [True, False, None]) def test_update_dtype(self, ordered_fixture, new_categories, new_ordered): dtype = CategoricalDtype(list('abc'), ordered_fixture) new_dtype = CategoricalDtype(new_categories, new_ordered) expected_categories = new_dtype.categories if expected_categories is None: expected_categories = dtype.categories expected_ordered = new_dtype.ordered if expected_ordered is None: expected_ordered = dtype.ordered result = dtype.update_dtype(new_dtype) tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered def test_update_dtype_string(self, ordered_fixture): dtype = CategoricalDtype(list('abc'), ordered_fixture) expected_categories = dtype.categories expected_ordered = dtype.ordered result = dtype.update_dtype('category') tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered @pytest.mark.parametrize( 'bad_dtype', ['foo', object, np.int64, PeriodDtype('Q')]) def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype)
@pytest.mark.parametrize('dtype', [ CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype, ]) def test_registry(dtype): assert dtype in registry.dtypes @pytest.mark.parametrize('dtype, expected', [ ('int64', None), ('interval', IntervalDtype()), ('interval[int64]', IntervalDtype()), ('interval[datetime64[ns]]', IntervalDtype('datetime64[ns]')), ('period[D]', PeriodDtype('D')), ('category', CategoricalDtype()), ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')), ]) def test_registry_find(dtype, expected): assert registry.find(dtype) == expected @pytest.mark.parametrize('dtype, expected', [(str, False), (int, False), (bool, True), (np.bool, True), (np.array(['a', 'b']), False), (pd.Series([1, 2]), False), (np.array([True, False]), True), (pd.Series([True, False]), True), (pd.SparseArray([True, False]), True), (SparseDtype(bool), True)])
class TestDataFrameSetItem: @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) def test_setitem_dtype(self, dtype, float_frame): arr = np.random.randn(len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): data = np.random.randn(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) def test_setitem_error_msmgs(self): # GH 7432 df = DataFrame( {"bar": [1, 2, 3], "baz": ["d", "e", "f"]}, index=Index(["a", "b", "c"], name="foo"), ) ser = Series( ["g", "h", "i", "j"], index=Index(["a", "b", "c", "a"], name="foo"), name="fiz", ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) msg = "incompatible index of inserted column with frame index" with pytest.raises(TypeError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() def test_setitem_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=range(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) tm.assert_frame_equal(df, expected) def test_setitem_different_dtype(self): df = DataFrame( np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"] ) df.insert(0, "foo", df["a"]) df.insert(2, "bar", df["c"]) # diff dtype # new item df["x"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 5 + [np.dtype("float32")], index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) # replacing current (in different block) df["a"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) df["y"] = df["a"].astype("int32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], index=["foo", "c", "bar", "b", "a", "x", "y"], ) tm.assert_series_equal(result, expected) def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) tm.assert_frame_equal(df, exp) def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) df["now"] = Timestamp("20130101", tz="UTC") expected = DataFrame( [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"] ) tm.assert_frame_equal(df, expected) def test_setitem_wrong_length_categorical_dtype_raises(self): # GH#29523 cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) df = DataFrame(range(10), columns=["bar"]) msg = ( rf"Length of values \({len(cat)}\) " rf"does not match length of index \({len(df)}\)" ) with pytest.raises(ValueError, match=msg): df["foo"] = cat def test_setitem_with_sparse_value(self): # GH#8131 df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_array = SparseArray([0, 0, 1]) df["new_column"] = sp_array expected = Series(sp_array, name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_with_unaligned_sparse_value(self): df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) df["new_column"] = sp_series expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_dict_preserves_dtypes(self): # https://github.com/pandas-dev/pandas/issues/34573 expected = DataFrame( { "a": Series([0, 1, 2], dtype="int64"), "b": Series([1, 2, 3], dtype=float), "c": Series([1, 2, 3], dtype=float), } ) df = DataFrame( { "a": Series([], dtype="int64"), "b": Series([], dtype=float), "c": Series([], dtype=float), } ) for idx, b in enumerate([1, 2, 3]): df.loc[df.shape[0]] = {"a": int(idx), "b": float(b), "c": float(b)} tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "obj,dtype", [ (Period("2020-01"), PeriodDtype("M")), (Interval(left=0, right=5), IntervalDtype("int64", "right")), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), ], ) def test_setitem_extension_types(self, obj, dtype): # GH: 34832 expected = DataFrame({"idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype)}) df = DataFrame({"idx": [1, 2, 3]}) df["obj"] = obj tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "ea_name", [ dtype.name for dtype in ea_registry.dtypes # property would require instantiation if not isinstance(dtype.name, property) ] # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 result = DataFrame([0]) result[ea_name] = [1] expected = DataFrame({0: [0], ea_name: [1]}) tm.assert_frame_equal(result, expected) def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]") result = Series(data_ns).to_frame() result["new"] = data_ns expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, "nat"], dtype="datetime64[s]") result["new"] = data_s expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into a not-yet-existing column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df[unit] = vals assert df[unit].dtype == np.dtype("M8[ns]") assert (df[unit].values == ex_vals).all() @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_existing_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into an already-existing dt64 column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") # We overwrite existing dt64 column with new, non-nano dt64 vals df["dates"] = vals assert (df["dates"].values == ex_vals).all() def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") # setitem df["C"] = idx tm.assert_series_equal(df["C"], Series(idx, name="C")) df["D"] = "foo" df["D"] = idx tm.assert_series_equal(df["D"], Series(idx, name="D")) del df["D"] # assert that A & C are not sharing the same base (e.g. they # are copies) v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) v1base = v1._data.base v2base = v2._data.base assert v1base is None or (id(v1base) != id(v2base)) # with nan df2 = df.copy() df2.iloc[1, 1] = NaT df2.iloc[1, 2] = NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal(df2.dtypes, df.dtypes) def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") df = DataFrame(np.random.randn(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) tm.assert_index_equal(rs, rng, check_names=False) assert rs.name == "Index" assert rng.name == "index" rs = df.reset_index().set_index("index") assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) def test_setitem_complete_column_with_array(self): # GH#37954 df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) arr = np.array([[1, 1], [3, 1], [5, 1]]) df[["c", "d"]] = arr expected = DataFrame( { "a": ["one", "two", "three"], "b": [1, 2, 3], "c": [1, 3, 5], "d": [1, 1, 1], } ) expected["c"] = expected["c"].astype(arr.dtype) expected["d"] = expected["d"].astype(arr.dtype) assert expected["c"].dtype == arr.dtype assert expected["d"].dtype == arr.dtype tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) df = DataFrame(np.random.randn(3, 3), columns=cols) df[False] = ["a", "b", "c"] expected_cols = Index([1, 2, 3, False], dtype=object) if dtype == "f8": expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) tm.assert_index_equal(df.columns, expected_cols) @pytest.mark.parametrize("indexer", ["B", ["B"]]) def test_setitem_frame_length_0_str_key(self, indexer): # GH#38831 df = DataFrame(columns=["A", "B"]) other = DataFrame({"B": [1, 2]}) df[indexer] = other expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ [0, 1, 2, 3, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], ], dtype="object", ) if using_array_manager: # setitem replaces column so changes dtype expected.columns = cols expected["C"] = expected["C"].astype("int64") # TODO(ArrayManager) .loc still overwrites expected["B"] = expected["B"].astype("int64") else: # set these with unique columns to be extra-unambiguous expected[2] = expected[2].astype(np.int64) expected[5] = expected[5].astype(np.int64) expected.columns = cols tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns_size_mismatch(self): # GH#39510 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) with pytest.raises(ValueError, match="Columns must be same length as key"): df[["A"]] = (0, 3, 5) df2 = df.iloc[:, :3] # unique columns with pytest.raises(ValueError, match="Columns must be same length as key"): df2[["A"]] = (0, 3, 5) @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) def test_setitem_df_wrong_column_number(self, cols): # GH#38604 df = DataFrame([[1, 2, 3]], columns=cols) rhs = DataFrame([[10, 11]], columns=["d", "e"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df["a"] = rhs def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) df[["a", "b"]] = rhs expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) df[["c", "b"]] = rhs expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) tm.assert_frame_equal(df, expected) def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): # GH#39403 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11]], columns=["a", "b"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs def test_setitem_intervals(self): df = DataFrame({"A": range(10)}) ser = cut(df["A"], 5) assert isinstance(ser.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainder are converted to in-line objects # containing an IntervalIndex.values df["B"] = ser df["C"] = np.array(ser) df["D"] = ser.values df["E"] = np.array(ser.values) df["F"] = ser.astype(object) assert is_categorical_dtype(df["B"].dtype) assert is_interval_dtype(df["B"].cat.categories) assert is_categorical_dtype(df["D"].dtype) assert is_interval_dtype(df["D"].cat.categories) # These go through the Series constructor and so get inferred back # to IntervalDtype assert is_interval_dtype(df["C"]) assert is_interval_dtype(df["E"]) # But the Series constructor doesn't do inference on Series objects, # so setting df["F"] doesn't get cast back to IntervalDtype assert is_object_dtype(df["F"]) # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) tm.assert_index_equal(c(df.B), c(df.B)) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) tm.assert_index_equal(c(df.C), c(df.D), check_names=False) # B & D are the same Series tm.assert_series_equal(df["B"], df["B"]) tm.assert_series_equal(df["B"], df["D"], check_names=False) # C & E are the same Series tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) def test_setitem_categorical(self): # GH#35369 df = DataFrame({"h": Series(list("mn")).astype("category")}) df.h = df.h.cat.reorder_categories(["n", "m"]) expected = DataFrame( {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])} ) tm.assert_frame_equal(df, expected) def test_setitem_with_empty_listlike(self): # GH#17101 index = Index([], name="idx") result = DataFrame(columns=["A"], index=index) result["A"] = [] expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) @pytest.mark.parametrize( "cols, values, expected", [ (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols (["C", "B", "a"], [1, 2, 3], 3), # no duplicates (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order (["C", "a", "B"], [3, 2, 1], 2), # in the middle ], ) def test_setitem_same_column(self, cols, values, expected): # GH#23239 df = DataFrame([values], columns=cols) df["a"] = df["a"] result = df["a"].values[0] assert result == expected def test_setitem_multi_index(self): # GH#7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] np.random.shuffle(i) df["jim"] = df["jolie"].loc[i, ::-1] tm.assert_frame_equal(df["jim"], df["jolie"]) np.random.shuffle(j) df[("joe", "first")] = df[("jolie", "last")].loc[i, j] tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) np.random.shuffle(j) df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) @pytest.mark.parametrize( "columns,box,expected", [ ( ["A", "B", "C", "D"], 7, DataFrame( [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), ), ( ["C", "D"], [7, 8], DataFrame( [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], columns=["A", "B", "C", "D"], ), ), ( ["A", "B", "C"], np.array([7, 8, 9], dtype=np.int64), DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), ), ( ["B", "C", "D"], [[7, 8, 9], [10, 11, 12], [13, 14, 15]], DataFrame( [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], columns=["A", "B", "C", "D"], ), ), ( ["C", "A", "D"], np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), DataFrame( [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], columns=["A", "B", "C", "D"], ), ), ( ["A", "C"], DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), DataFrame( [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"] ), ), ], ) def test_setitem_list_missing_columns(self, columns, box, expected): # GH#29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df[columns] = box tm.assert_frame_equal(df, expected) def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples result = float_frame["tuples"] expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) def test_setitem_iloc_generator(self): # GH#39614 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer] = 1 expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_iloc_two_dimensional_generator(self): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer, 1] = 1 expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") df = DataFrame(index=index) df["a"] = Series(name="a", index=index, dtype=np.uint32) df["b"] = Series(name="b", index=index, dtype="S64") df["c"] = Series(name="c", index=index, dtype="S64") df["d"] = Series(name="d", index=index, dtype=np.uint8) result = df.dtypes expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) def test_boolean_mask_nullable_int64(self): # GH 28928 result = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( {"a": "int64", "b": "Int64"} ) mask = Series(False, index=result.index) result.loc[mask, "a"] = result["a"] result.loc[mask, "b"] = result["b"] expected = DataFrame({"a": [3, 4], "b": [5, 6]}).astype( {"a": "int64", "b": "Int64"} ) tm.assert_frame_equal(result, expected)
class TestCategoricalDtypeParametrized: @pytest.mark.parametrize( "categories", [ list("abcd"), np.arange(1000), ["a", "b", 10, 2, 1.3, True], [True, False], pd.date_range("2017", periods=4), ], ) def test_basic(self, categories, ordered): c1 = CategoricalDtype(categories, ordered=ordered) tm.assert_index_equal(c1.categories, pd.Index(categories)) assert c1.ordered is ordered def test_order_matters(self): categories = ["a", "b"] c1 = CategoricalDtype(categories, ordered=True) c2 = CategoricalDtype(categories, ordered=False) c3 = CategoricalDtype(categories, ordered=None) assert c1 is not c2 assert c1 is not c3 @pytest.mark.parametrize("ordered", [False, None]) def test_unordered_same(self, ordered): c1 = CategoricalDtype(["a", "b"], ordered=ordered) c2 = CategoricalDtype(["b", "a"], ordered=ordered) assert hash(c1) == hash(c2) def test_categories(self): result = CategoricalDtype(["a", "b", "c"]) tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"])) assert result.ordered is False def test_equal_but_different(self, ordered): c1 = CategoricalDtype([1, 2, 3]) c2 = CategoricalDtype([1.0, 2.0, 3.0]) assert c1 is not c2 assert c1 != c2 @pytest.mark.parametrize("v1, v2", [([1, 2, 3], [1, 2, 3]), ([1, 2, 3], [3, 2, 1])]) def test_order_hashes_different(self, v1, v2): c1 = CategoricalDtype(v1, ordered=False) c2 = CategoricalDtype(v2, ordered=True) c3 = CategoricalDtype(v1, ordered=None) assert c1 is not c2 assert c1 is not c3 def test_nan_invalid(self): msg = "Categorical categories cannot be null" with pytest.raises(ValueError, match=msg): CategoricalDtype([1, 2, np.nan]) def test_non_unique_invalid(self): msg = "Categorical categories must be unique" with pytest.raises(ValueError, match=msg): CategoricalDtype([1, 2, 1]) def test_same_categories_different_order(self): c1 = CategoricalDtype(["a", "b"], ordered=True) c2 = CategoricalDtype(["b", "a"], ordered=True) assert c1 is not c2 @pytest.mark.parametrize("ordered1", [True, False, None]) @pytest.mark.parametrize("ordered2", [True, False, None]) def test_categorical_equality(self, ordered1, ordered2): # same categories, same order # any combination of None/False are equal # True/True is the only combination with True that are equal c1 = CategoricalDtype(list("abc"), ordered1) c2 = CategoricalDtype(list("abc"), ordered2) result = c1 == c2 expected = bool(ordered1) is bool(ordered2) assert result is expected # same categories, different order # any combination of None/False are equal (order doesn't matter) # any combination with True are not equal (different order of cats) c1 = CategoricalDtype(list("abc"), ordered1) c2 = CategoricalDtype(list("cab"), ordered2) result = c1 == c2 expected = (bool(ordered1) is False) and (bool(ordered2) is False) assert result is expected # different categories c2 = CategoricalDtype([1, 2, 3], ordered2) assert c1 != c2 # none categories c1 = CategoricalDtype(list("abc"), ordered1) c2 = CategoricalDtype(None, ordered2) c3 = CategoricalDtype(None, ordered1) assert c1 == c2 assert c2 == c1 assert c2 == c3 @pytest.mark.parametrize("categories", [list("abc"), None]) @pytest.mark.parametrize("other", ["category", "not a category"]) def test_categorical_equality_strings(self, categories, ordered, other): c1 = CategoricalDtype(categories, ordered) result = c1 == other expected = other == "category" assert result is expected def test_invalid_raises(self): with pytest.raises(TypeError, match="ordered"): CategoricalDtype(["a", "b"], ordered="foo") with pytest.raises(TypeError, match="'categories' must be list-like"): CategoricalDtype("category") def test_mixed(self): a = CategoricalDtype(["a", "b", 1, 2]) b = CategoricalDtype(["a", "b", "1", "2"]) assert hash(a) != hash(b) def test_from_categorical_dtype_identity(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # Identity test for no changes c2 = CategoricalDtype._from_categorical_dtype(c1) assert c2 is c1 def test_from_categorical_dtype_categories(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override categories result = CategoricalDtype._from_categorical_dtype(c1, categories=[2, 3]) assert result == CategoricalDtype([2, 3], ordered=True) def test_from_categorical_dtype_ordered(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override ordered result = CategoricalDtype._from_categorical_dtype(c1, ordered=False) assert result == CategoricalDtype([1, 2, 3], ordered=False) def test_from_categorical_dtype_both(self): c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True) # override ordered result = CategoricalDtype._from_categorical_dtype(c1, categories=[1, 2], ordered=False) assert result == CategoricalDtype([1, 2], ordered=False) def test_str_vs_repr(self, ordered): c1 = CategoricalDtype(["a", "b"], ordered=ordered) assert str(c1) == "category" # Py2 will have unicode prefixes pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)" assert re.match(pat.format(ordered=ordered), repr(c1)) def test_categorical_categories(self): # GH17884 c1 = CategoricalDtype(Categorical(["a", "b"])) tm.assert_index_equal(c1.categories, pd.Index(["a", "b"])) c1 = CategoricalDtype(CategoricalIndex(["a", "b"])) tm.assert_index_equal(c1.categories, pd.Index(["a", "b"])) @pytest.mark.parametrize( "new_categories", [list("abc"), list("cba"), list("wxyz"), None]) @pytest.mark.parametrize("new_ordered", [True, False, None]) def test_update_dtype(self, ordered, new_categories, new_ordered): original_categories = list("abc") dtype = CategoricalDtype(original_categories, ordered) new_dtype = CategoricalDtype(new_categories, new_ordered) result = dtype.update_dtype(new_dtype) expected_categories = pd.Index(new_categories or original_categories) expected_ordered = new_ordered if new_ordered is not None else dtype.ordered tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered def test_update_dtype_string(self, ordered): dtype = CategoricalDtype(list("abc"), ordered) expected_categories = dtype.categories expected_ordered = dtype.ordered result = dtype.update_dtype("category") tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered @pytest.mark.parametrize( "bad_dtype", ["foo", object, np.int64, PeriodDtype("Q")]) def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list("abc"), False) msg = "a CategoricalDtype must be passed to perform an update, " with pytest.raises(ValueError, match=msg): dtype.update_dtype(bad_dtype)
@pytest.mark.parametrize( "dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype]) def test_registry(dtype): assert dtype in registry.dtypes @pytest.mark.parametrize( "dtype, expected", [ ("int64", None), ("interval", IntervalDtype()), ("interval[int64]", IntervalDtype()), ("interval[datetime64[ns]]", IntervalDtype("datetime64[ns]")), ("period[D]", PeriodDtype("D")), ("category", CategoricalDtype()), ("datetime64[ns, US/Eastern]", DatetimeTZDtype("ns", "US/Eastern")), ], ) def test_registry_find(dtype, expected): assert registry.find(dtype) == expected @pytest.mark.parametrize( "dtype, expected", [ (str, False), (int, False), (bool, True), (np.bool, True),
def test_subclass(self): a = PeriodDtype('period[D]') b = PeriodDtype('period[3D]') assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b))
def dtype(self): """ Class level fixture of dtype for TestPeriodDtype """ return PeriodDtype("D")
def test_coerce_to_dtype(self): assert _coerce_to_dtype('period[D]') == PeriodDtype('period[D]') assert _coerce_to_dtype('period[3M]') == PeriodDtype('period[3M]')
def test_identity(self): assert PeriodDtype("period[D]") == PeriodDtype("period[D]") assert PeriodDtype("period[D]") is PeriodDtype("period[D]") assert PeriodDtype("period[3D]") == PeriodDtype("period[3D]") assert PeriodDtype("period[3D]") is PeriodDtype("period[3D]") assert PeriodDtype("period[1S1U]") == PeriodDtype("period[1000001U]") assert PeriodDtype("period[1S1U]") is PeriodDtype("period[1000001U]")
def test_is_dtype(self): assert PeriodDtype.is_dtype(self.dtype) assert PeriodDtype.is_dtype('period[D]') assert PeriodDtype.is_dtype('period[3D]') assert PeriodDtype.is_dtype(PeriodDtype('3D')) assert PeriodDtype.is_dtype('period[U]') assert PeriodDtype.is_dtype('period[S]') assert PeriodDtype.is_dtype(PeriodDtype('U')) assert PeriodDtype.is_dtype(PeriodDtype('S')) assert not PeriodDtype.is_dtype('D') assert not PeriodDtype.is_dtype('3D') assert not PeriodDtype.is_dtype('U') assert not PeriodDtype.is_dtype('S') assert not PeriodDtype.is_dtype('foo') assert not PeriodDtype.is_dtype(np.object_) assert not PeriodDtype.is_dtype(np.int64) assert not PeriodDtype.is_dtype(np.float64)
def test_construct_dtype_from_string_invalid_raises(self, string): msg = f"Cannot construct a 'PeriodDtype' from '{string}'" with pytest.raises(TypeError, match=re.escape(msg)): PeriodDtype.construct_from_string(string)
def test_not_string(self): # though PeriodDtype has object kind, it cannot be string assert not is_string_dtype(PeriodDtype('D'))
def test_is_dtype(self, dtype): assert PeriodDtype.is_dtype(dtype) assert PeriodDtype.is_dtype("period[D]") assert PeriodDtype.is_dtype("period[3D]") assert PeriodDtype.is_dtype(PeriodDtype("3D")) assert PeriodDtype.is_dtype("period[U]") assert PeriodDtype.is_dtype("period[S]") assert PeriodDtype.is_dtype(PeriodDtype("U")) assert PeriodDtype.is_dtype(PeriodDtype("S")) assert not PeriodDtype.is_dtype("D") assert not PeriodDtype.is_dtype("3D") assert not PeriodDtype.is_dtype("U") assert not PeriodDtype.is_dtype("S") assert not PeriodDtype.is_dtype("foo") assert not PeriodDtype.is_dtype(np.object_) assert not PeriodDtype.is_dtype(np.int64) assert not PeriodDtype.is_dtype(np.float64)
class TestCategoricalDtype(Base): def create(self): return CategoricalDtype() def test_pickle(self): # make sure our cache is NOT pickled # clear the cache type(self.dtype).reset_cache() assert not len(self.dtype._cache) # force back to the cache result = tm.round_trip_pickle(self.dtype) assert result == self.dtype def test_hash_vs_equality(self): dtype = self.dtype dtype2 = CategoricalDtype() assert dtype == dtype2 assert dtype2 == dtype assert hash(dtype) == hash(dtype2) def test_equality(self): assert is_dtype_equal(self.dtype, 'category') assert is_dtype_equal(self.dtype, CategoricalDtype()) assert not is_dtype_equal(self.dtype, 'foo') def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') assert is_dtype_equal(self.dtype, result) pytest.raises( TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_constructor_invalid(self): with tm.assert_raises_regex(TypeError, "CategoricalIndex.* must be called"): CategoricalDtype("category") def test_is_dtype(self): assert CategoricalDtype.is_dtype(self.dtype) assert CategoricalDtype.is_dtype('category') assert CategoricalDtype.is_dtype(CategoricalDtype()) assert not CategoricalDtype.is_dtype('foo') assert not CategoricalDtype.is_dtype(np.float64) def test_basic(self): assert is_categorical_dtype(self.dtype) factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) s = Series(factor, name='A') # dtypes assert is_categorical_dtype(s.dtype) assert is_categorical_dtype(s) assert not is_categorical_dtype(np.dtype('float64')) assert is_categorical(s.dtype) assert is_categorical(s) assert not is_categorical(np.dtype('float64')) assert not is_categorical(1.0) def test_tuple_categories(self): categories = [(1, 'a'), (2, 'b'), (3, 'c')] result = CategoricalDtype(categories) assert all(result.categories == categories) @pytest.mark.parametrize('dtype', [ CategoricalDtype(list('abc'), False), CategoricalDtype(list('abc'), True)]) @pytest.mark.parametrize('new_dtype', [ 'category', CategoricalDtype(None, False), CategoricalDtype(None, True), CategoricalDtype(list('abc'), False), CategoricalDtype(list('abc'), True), CategoricalDtype(list('cba'), False), CategoricalDtype(list('cba'), True), CategoricalDtype(list('wxyz'), False), CategoricalDtype(list('wxyz'), True)]) def test_update_dtype(self, dtype, new_dtype): if isinstance(new_dtype, string_types) and new_dtype == 'category': expected_categories = dtype.categories expected_ordered = dtype.ordered else: expected_categories = new_dtype.categories if expected_categories is None: expected_categories = dtype.categories expected_ordered = new_dtype.ordered result = dtype._update_dtype(new_dtype) tm.assert_index_equal(result.categories, expected_categories) assert result.ordered is expected_ordered @pytest.mark.parametrize('bad_dtype', [ 'foo', object, np.int64, PeriodDtype('Q')]) def test_update_dtype_errors(self, bad_dtype): dtype = CategoricalDtype(list('abc'), False) msg = 'a CategoricalDtype must be passed to perform an update, ' with tm.assert_raises_regex(ValueError, msg): dtype._update_dtype(bad_dtype)
def test_empty(self): dt = PeriodDtype() msg = "object has no attribute 'freqstr'" with pytest.raises(AttributeError, match=msg): str(dt)
def dtype(self): return PeriodDtype.construct_from_string(self.freq)
class TestIntervalDtype(Base): @pytest.fixture def dtype(self): """ Class level fixture of dtype for TestIntervalDtype """ return IntervalDtype("int64") def test_hash_vs_equality(self, dtype): # make sure that we satisfy is semantics dtype2 = IntervalDtype("int64") dtype3 = IntervalDtype(dtype2) assert dtype == dtype2 assert dtype2 == dtype assert dtype3 == dtype assert dtype is dtype2 assert dtype2 is dtype3 assert dtype3 is dtype assert hash(dtype) == hash(dtype2) assert hash(dtype) == hash(dtype3) dtype1 = IntervalDtype("interval") dtype2 = IntervalDtype(dtype1) dtype3 = IntervalDtype("interval") assert dtype2 == dtype1 assert dtype2 == dtype2 assert dtype2 == dtype3 assert dtype2 is dtype1 assert dtype2 is dtype2 assert dtype2 is dtype3 assert hash(dtype2) == hash(dtype1) assert hash(dtype2) == hash(dtype2) assert hash(dtype2) == hash(dtype3) @pytest.mark.parametrize( "subtype", ["interval[int64]", "Interval[int64]", "int64", np.dtype("int64")]) def test_construction(self, subtype): i = IntervalDtype(subtype) assert i.subtype == np.dtype("int64") assert is_interval_dtype(i) @pytest.mark.parametrize("subtype", [None, "interval", "Interval"]) def test_construction_generic(self, subtype): # generic i = IntervalDtype(subtype) assert i.subtype is None assert is_interval_dtype(i) @pytest.mark.parametrize( "subtype", [ CategoricalDtype(list("abc"), False), CategoricalDtype(list("wxyz"), True), object, str, "<U10", "interval[category]", "interval[object]", ], ) def test_construction_not_supported(self, subtype): # GH 19016 msg = ("category, object, and string subtypes are not supported " "for IntervalDtype") with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) @pytest.mark.parametrize("subtype", ["xx", "IntervalA", "Interval[foo]"]) def test_construction_errors(self, subtype): msg = "could not construct IntervalDtype" with pytest.raises(TypeError, match=msg): IntervalDtype(subtype) def test_construction_from_string(self, dtype): result = IntervalDtype("interval[int64]") assert is_dtype_equal(dtype, result) result = IntervalDtype.construct_from_string("interval[int64]") assert is_dtype_equal(dtype, result) @pytest.mark.parametrize("string", [0, 3.14, ("a", "b"), None]) def test_construction_from_string_errors(self, string): # these are invalid entirely msg = f"'construct_from_string' expects a string, got {type(string)}" with pytest.raises(TypeError, match=re.escape(msg)): IntervalDtype.construct_from_string(string) @pytest.mark.parametrize("string", ["foo", "foo[int64]", "IntervalA"]) def test_construction_from_string_error_subtype(self, string): # this is an invalid subtype msg = ("Incorrectly formatted string passed to constructor. " r"Valid formats include Interval or Interval\[dtype\] " "where dtype is numeric, datetime, or timedelta") with pytest.raises(TypeError, match=msg): IntervalDtype.construct_from_string(string) def test_subclass(self): a = IntervalDtype("interval[int64]") b = IntervalDtype("interval[int64]") assert issubclass(type(a), type(a)) assert issubclass(type(a), type(b)) def test_is_dtype(self, dtype): assert IntervalDtype.is_dtype(dtype) assert IntervalDtype.is_dtype("interval") assert IntervalDtype.is_dtype(IntervalDtype("float64")) assert IntervalDtype.is_dtype(IntervalDtype("int64")) assert IntervalDtype.is_dtype(IntervalDtype(np.int64)) assert not IntervalDtype.is_dtype("D") assert not IntervalDtype.is_dtype("3D") assert not IntervalDtype.is_dtype("U") assert not IntervalDtype.is_dtype("S") assert not IntervalDtype.is_dtype("foo") assert not IntervalDtype.is_dtype("IntervalA") assert not IntervalDtype.is_dtype(np.object_) assert not IntervalDtype.is_dtype(np.int64) assert not IntervalDtype.is_dtype(np.float64) def test_equality(self, dtype): assert is_dtype_equal(dtype, "interval[int64]") assert is_dtype_equal(dtype, IntervalDtype("int64")) assert is_dtype_equal(IntervalDtype("int64"), IntervalDtype("int64")) assert not is_dtype_equal(dtype, "int64") assert not is_dtype_equal(IntervalDtype("int64"), IntervalDtype("float64")) # invalid subtype comparisons do not raise when directly compared dtype1 = IntervalDtype("float64") dtype2 = IntervalDtype("datetime64[ns, US/Eastern]") assert dtype1 != dtype2 assert dtype2 != dtype1 @pytest.mark.parametrize( "subtype", [ None, "interval", "Interval", "int64", "uint64", "float64", "complex128", "datetime64", "timedelta64", PeriodDtype("Q"), ], ) def test_equality_generic(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) assert is_dtype_equal(dtype, "interval") assert is_dtype_equal(dtype, IntervalDtype()) @pytest.mark.parametrize( "subtype", [ "int64", "uint64", "float64", "complex128", "datetime64", "timedelta64", PeriodDtype("Q"), ], ) def test_name_repr(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) expected = f"interval[{subtype}]" assert str(dtype) == expected assert dtype.name == "interval" @pytest.mark.parametrize("subtype", [None, "interval", "Interval"]) def test_name_repr_generic(self, subtype): # GH 18980 dtype = IntervalDtype(subtype) assert str(dtype) == "interval" assert dtype.name == "interval" def test_basic(self, dtype): assert is_interval_dtype(dtype) ii = IntervalIndex.from_breaks(range(3)) assert is_interval_dtype(ii.dtype) assert is_interval_dtype(ii) s = Series(ii, name="A") assert is_interval_dtype(s.dtype) assert is_interval_dtype(s) def test_basic_dtype(self): assert is_interval_dtype("interval[int64]") assert is_interval_dtype(IntervalIndex.from_tuples([(0, 1)])) assert is_interval_dtype(IntervalIndex.from_breaks(np.arange(4))) assert is_interval_dtype( IntervalIndex.from_breaks(date_range("20130101", periods=3))) assert not is_interval_dtype("U") assert not is_interval_dtype("S") assert not is_interval_dtype("foo") assert not is_interval_dtype(np.object_) assert not is_interval_dtype(np.int64) assert not is_interval_dtype(np.float64) def test_caching(self): IntervalDtype.reset_cache() dtype = IntervalDtype("int64") assert len(IntervalDtype._cache) == 1 IntervalDtype("interval") assert len(IntervalDtype._cache) == 2 IntervalDtype.reset_cache() tm.round_trip_pickle(dtype) assert len(IntervalDtype._cache) == 0 def test_not_string(self): # GH30568: though IntervalDtype has object kind, it cannot be string assert not is_string_dtype(IntervalDtype())
def test_is_dtype(self): assert PeriodDtype.is_dtype(self.dtype) assert PeriodDtype.is_dtype('period[D]') assert PeriodDtype.is_dtype('period[3D]') assert PeriodDtype.is_dtype(PeriodDtype('3D')) assert PeriodDtype.is_dtype('period[U]') assert PeriodDtype.is_dtype('period[S]') assert PeriodDtype.is_dtype(PeriodDtype('U')) assert PeriodDtype.is_dtype(PeriodDtype('S')) assert not PeriodDtype.is_dtype('D') assert not PeriodDtype.is_dtype('3D') assert not PeriodDtype.is_dtype('U') assert not PeriodDtype.is_dtype('S') assert not PeriodDtype.is_dtype('foo') assert not PeriodDtype.is_dtype(np.object_) assert not PeriodDtype.is_dtype(np.int64) assert not PeriodDtype.is_dtype(np.float64)
def period_array( data: Union[Sequence[Optional[Period]], AnyArrayLike], freq: Optional[Union[str, Tick]] = None, copy: bool = False, ) -> PeriodArray: """ Construct a new PeriodArray from a sequence of Period scalars. Parameters ---------- data : Sequence of Period objects A sequence of Period objects. These are required to all have the same ``freq.`` Missing values can be indicated by ``None`` or ``pandas.NaT``. freq : str, Tick, or Offset The frequency of every element of the array. This can be specified to avoid inferring the `freq` from `data`. copy : bool, default False Whether to ensure a copy of the data is made. Returns ------- PeriodArray See Also -------- PeriodArray pandas.PeriodIndex Examples -------- >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A')]) <PeriodArray> ['2017', '2018'] Length: 2, dtype: period[A-DEC] >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A'), ... pd.NaT]) <PeriodArray> ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] Integers that look like years are handled >>> period_array([2000, 2001, 2002], freq='D') <PeriodArray> ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] Datetime-like strings may also be passed >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') <PeriodArray> ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ data_dtype = getattr(data, "dtype", None) if is_datetime64_dtype(data_dtype): return PeriodArray._from_datetime64(data, freq) if is_period_dtype(data_dtype): return PeriodArray(data, freq) # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)): data = list(data) data = np.asarray(data) dtype: Optional[PeriodDtype] if freq: dtype = PeriodDtype(freq) else: dtype = None if is_float_dtype(data) and len(data) > 0: raise TypeError( "PeriodIndex does not allow floating point in construction") data = ensure_object(data) return PeriodArray._from_sequence(data, dtype=dtype)