def test_numpy_transpose(self): for obj in self.objs: tm.assert_equal(np.transpose(obj), obj) with pytest.raises(ValueError, match=self.errmsg): np.transpose(obj, axes=1)
def assert_invalid_comparison(left, right, box): """ Assert that comparison operations with mismatched types behave correctly. Parameters ---------- left : np.ndarray, ExtensionArray, Index, or Series right : object box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array} """ # Not for tznaive-tzaware comparison # Note: not quite the same as how we do this for tm.box_expected xbox = box if box not in [Index, array] else np.array def xbox2(x): # Eventually we'd like this to be tighter, but for now we'll # just exclude PandasArray[bool] if isinstance(x, PandasArray): return x._ndarray return x result = xbox2(left == right) expected = xbox(np.zeros(result.shape, dtype=np.bool_)) tm.assert_equal(result, expected) result = xbox2(right == left) tm.assert_equal(result, expected) result = xbox2(left != right) tm.assert_equal(result, ~expected) result = xbox2(right != left) tm.assert_equal(result, ~expected) msg = "|".join([ "Invalid comparison between", "Cannot compare type", "not supported between", "invalid type promotion", ( # GH#36706 npdev 1.20.0 2020-09-28 r"The DTypes <class 'numpy.dtype\[datetime64\]'> and " r"<class 'numpy.dtype\[int64\]'> do not have a common DType. " "For example they cannot be stored in a single array unless the " "dtype is `object`."), ]) with pytest.raises(TypeError, match=msg): left < right with pytest.raises(TypeError, match=msg): left <= right with pytest.raises(TypeError, match=msg): left > right with pytest.raises(TypeError, match=msg): left >= right with pytest.raises(TypeError, match=msg): right < left with pytest.raises(TypeError, match=msg): right <= left with pytest.raises(TypeError, match=msg): right > left with pytest.raises(TypeError, match=msg): right >= left
def test_nat_arithmetic_td64_vector(op_name, box): # see gh-19124 vec = box(["1 day", "2 day"], dtype="timedelta64[ns]") box_nat = box([NaT, NaT], dtype="timedelta64[ns]") tm.assert_equal(_ops[op_name](vec, NaT), box_nat)
def test_apply_as_index_constant_lambda(as_index, expected): # GH 13217 df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]}) result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1) tm.assert_equal(result, expected)
def test_constructor_unwraps_index(self, index): if isinstance(index, pd.MultiIndex): raise pytest.skip("MultiIndex has no ._data") a = index b = type(a)(a) tm.assert_equal(a._data, b._data)
def test_rank_object_first(self, frame_or_series, na_option, ascending, expected): obj = frame_or_series(["foo", "foo", None, "foo"]) result = obj.rank(method="first", na_option=na_option, ascending=ascending) expected = frame_or_series(expected) tm.assert_equal(result, expected)
def test_numpy_zero_dim_ndarray(other): arr = integer_array([1, None, 2]) result = arr + np.array(other) expected = arr + other tm.assert_equal(result, expected)
def test_transpose(index_or_series_obj): obj = index_or_series_obj tm.assert_equal(obj.transpose(), obj)
def test_ufunc_coercions(self, holder): idx = holder([1, 2, 3, 4, 5], name="x") box = pd.Series if holder is pd.Series else pd.Index result = np.sqrt(idx) assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = np.divide(idx, 2.0) assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) # _evaluate_numeric_binop result = idx + 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx - 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx * 1.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp) result = idx / 2.0 assert result.dtype == "f8" and isinstance(result, box) exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x") exp = tm.box_expected(exp, box) tm.assert_equal(result, exp)
def test_where_inplace_casting(data): # GH 22051 df = DataFrame({"a": data}) df_copy = df.where(pd.notnull(df), None).copy() df.where(pd.notnull(df), None, inplace=True) tm.assert_equal(df, df_copy)
def test_logical_operators_nans(self, left, right, op, expected, frame_or_series): # GH#13896 result = op(frame_or_series(left), frame_or_series(right)) expected = frame_or_series(expected) tm.assert_equal(result, expected)
def test_array_inference(data, expected): result = pd.array(data) tm.assert_equal(result, expected)
def test_array(data, dtype, expected): result = pd.array(data, dtype=dtype) tm.assert_equal(result, expected)
def test_numpy_transpose(self, index_or_series_obj): obj = index_or_series_obj tm.assert_equal(np.transpose(obj), obj) with pytest.raises(ValueError, match=self.errmsg): np.transpose(obj, axes=1)
def test_sample_none_weights(self, obj): # Check None are also replaced by zeros. weights_with_None = [None] * 10 weights_with_None[5] = 0.5 tm.assert_equal(obj.sample(n=1, axis=0, weights=weights_with_None), obj.iloc[5:6])
def test_mul_int_identity(self, op, numeric_idx, box_with_array): idx = numeric_idx idx = tm.box_expected(idx, box_with_array) result = op(idx, 1) tm.assert_equal(result, idx)
def test_sample(self, test, obj): # Fixes issue: 2419 # Check behavior of random_state argument # Check for stability when receives seed or random state -- run 10 # times. seed = np.random.randint(0, 100) tm.assert_equal(obj.sample(n=4, random_state=seed), obj.sample(n=4, random_state=seed)) tm.assert_equal( obj.sample(frac=0.7, random_state=seed), obj.sample(frac=0.7, random_state=seed), ) tm.assert_equal( obj.sample(n=4, random_state=np.random.RandomState(test)), obj.sample(n=4, random_state=np.random.RandomState(test)), ) tm.assert_equal( obj.sample(frac=0.7, random_state=np.random.RandomState(test)), obj.sample(frac=0.7, random_state=np.random.RandomState(test)), ) tm.assert_equal( obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), obj.sample(frac=2, replace=True, random_state=np.random.RandomState(test)), ) os1, os2 = [], [] for _ in range(2): np.random.seed(test) os1.append(obj.sample(n=4)) os2.append(obj.sample(frac=0.7)) tm.assert_equal(*os1) tm.assert_equal(*os2)
def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 s_dr = Series(date_range("1/1/2015", periods=5, tz="MET")) c_dr = s_dr.astype("category") s_pr = Series(period_range("1/1/2015", freq="D", periods=5)) c_pr = s_pr.astype("category") s_tdr = Series(timedelta_range("1 days", "10 days")) c_tdr = s_tdr.astype("category") # only testing field (like .day) # and bool (is_month_start) get_ops = lambda x: x._datetimelike_ops test_data = [ ("Datetime", get_ops(DatetimeArray), s_dr, c_dr), ("Period", get_ops(PeriodArray), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaArray), s_tdr, c_tdr), ] assert isinstance(c_dr.dt, Properties) special_func_defs = [ ("strftime", ("%Y-%m-%d", ), {}), ("tz_convert", ("EST", ), {}), ("round", ("D", ), {}), ("floor", ("D", ), {}), ("ceil", ("D", ), {}), ("asfreq", ("D", ), {}), # FIXME: don't leave commented-out # ('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] # the series is already localized _ignore_names = ["tz_localize", "components"] for name, attr_names, s, c in test_data: func_names = [ f for f in dir(s.dt) if not (f.startswith("_") or f in attr_names or f in _special_func_names or f in _ignore_names) ] func_defs = [(f, (), {}) for f in func_names] for f_def in special_func_defs: if f_def[0] in dir(s.dt): func_defs.append(f_def) for func, args, kwargs in func_defs: with warnings.catch_warnings(): if func == "to_period": # dropping TZ warnings.simplefilter("ignore", UserWarning) res = getattr(c.dt, func)(*args, **kwargs) exp = getattr(s.dt, func)(*args, **kwargs) tm.assert_equal(res, exp) for attr in attr_names: if attr in ["week", "weekofyear"]: # GH#33595 Deprecate week and weekofyear continue res = getattr(c.dt, attr) exp = getattr(s.dt, attr) if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) elif isinstance(res, Series): tm.assert_series_equal(res, exp) else: tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype("category") msg = "Can only use .dt accessor with datetimelike" with pytest.raises(AttributeError, match=msg): invalid.dt assert not hasattr(invalid, "str")
def test_s3_protocols(s3_resource, tips_file, protocol, s3so): tm.assert_equal( read_csv("%s://pandas-test/tips.csv" % protocol, storage_options=s3so), read_csv(tips_file), )
def test_replace_extension_other(self, frame_or_series): # https://github.com/pandas-dev/pandas/issues/34530 obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64")) result = obj.replace("", "") # no exception # should not have changed dtype tm.assert_equal(obj, result)
def test_reindex_empty_series_tz_dtype(): # GH 20869 result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1]) expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]") tm.assert_equal(result, expected)
def test_expanding_count_with_min_periods_exceeding_series_length( frame_or_series): # GH 25857 result = frame_or_series(range(5)).expanding(min_periods=6).count() expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan]) tm.assert_equal(result, expected)
def test_rolling_count_with_min_periods(constructor): # GH 26996 result = constructor(range(5)).rolling(3, min_periods=3).count() expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0]) tm.assert_equal(result, expected)
def test_arith_zero_dim_ndarray(other): arr = pd.array([1, None, 2], dtype="Float64") result = arr + np.array(other) expected = arr + other tm.assert_equal(result, expected)
def test_timedelta_fillna(self, frame_or_series): # GH#3371 ser = Series([ Timestamp("20130101"), Timestamp("20130101"), Timestamp("20130102"), Timestamp("20130103 9:01:01"), ]) td = ser.diff() obj = frame_or_series(td) # reg fillna result = obj.fillna(Timedelta(seconds=0)) expected = Series([ timedelta(0), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ]) expected = frame_or_series(expected) tm.assert_equal(result, expected) # interpreted as seconds, no longer supported msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'" with pytest.raises(TypeError, match=msg): obj.fillna(1) result = obj.fillna(Timedelta(seconds=1)) expected = Series([ timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ]) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(timedelta(days=1, seconds=1)) expected = Series([ timedelta(days=1, seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ]) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(np.timedelta64(10**9)) expected = Series([ timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ]) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(NaT) expected = Series( [ NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ], dtype="m8[ns]", ) expected = frame_or_series(expected) tm.assert_equal(result, expected) # ffill td[2] = np.nan obj = frame_or_series(td) result = obj.ffill() expected = td.fillna(Timedelta(seconds=0)) expected[0] = np.nan expected = frame_or_series(expected) tm.assert_equal(result, expected) # bfill td[2] = np.nan obj = frame_or_series(td) result = obj.bfill() expected = td.fillna(Timedelta(seconds=0)) expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) expected = frame_or_series(expected) tm.assert_equal(result, expected)
def test_timedelta_fillna(self, frame_or_series): # GH#3371 ser = Series( [ Timestamp("20130101"), Timestamp("20130101"), Timestamp("20130102"), Timestamp("20130103 9:01:01"), ] ) td = ser.diff() obj = frame_or_series(td) # reg fillna result = obj.fillna(Timedelta(seconds=0)) expected = Series( [ timedelta(0), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ] ) expected = frame_or_series(expected) tm.assert_equal(result, expected) # interpreted as seconds, deprecated with pytest.raises(TypeError, match="Passing integers to fillna"): obj.fillna(1) result = obj.fillna(Timedelta(seconds=1)) expected = Series( [ timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ] ) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(timedelta(days=1, seconds=1)) expected = Series( [ timedelta(days=1, seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ] ) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(np.timedelta64(10 ** 9)) expected = Series( [ timedelta(seconds=1), timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ] ) expected = frame_or_series(expected) tm.assert_equal(result, expected) result = obj.fillna(NaT) expected = Series( [ NaT, timedelta(0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1), ], dtype="m8[ns]", ) expected = frame_or_series(expected) tm.assert_equal(result, expected) # ffill td[2] = np.nan obj = frame_or_series(td) result = obj.ffill() expected = td.fillna(Timedelta(seconds=0)) expected[0] = np.nan expected = frame_or_series(expected) tm.assert_equal(result, expected) # bfill td[2] = np.nan obj = frame_or_series(td) result = obj.bfill() expected = td.fillna(Timedelta(seconds=0)) expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) expected = frame_or_series(expected) tm.assert_equal(result, expected)
def test_rolling_count_with_min_periods(frame_or_series): # GH 26996 result = frame_or_series(range(5)).rolling(3, min_periods=3).count() expected = frame_or_series([np.nan, np.nan, 3.0, 3.0, 3.0]) tm.assert_equal(result, expected)
def test_between_time(self, close_open_fixture, frame_or_series): rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) if frame_or_series is not DataFrame: ts = ts[0] stime = time(0, 0) etime = time(1, 0) inc_start, inc_end = close_open_fixture filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time("00:00", "01:00") expected = ts.between_time(stime, etime) tm.assert_equal(result, expected) # across midnight rng = date_range("1/1/2000", "1/5/2000", freq="5min") ts = DataFrame(np.random.randn(len(rng), 2), index=rng) if frame_or_series is not DataFrame: ts = ts[0] stime = time(22, 0) etime = time(9, 0) filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_truncate(self, datetime_frame, frame_or_series): ts = datetime_frame[::3] ts = tm.get_obj(ts, frame_or_series) start, end = datetime_frame.index[3], datetime_frame.index[6] start_missing = datetime_frame.index[2] end_missing = datetime_frame.index[7] # neither specified truncated = ts.truncate() tm.assert_equal(truncated, ts) # both specified expected = ts[1:3] truncated = ts.truncate(start, end) tm.assert_equal(truncated, expected) truncated = ts.truncate(start_missing, end_missing) tm.assert_equal(truncated, expected) # start specified expected = ts[1:] truncated = ts.truncate(before=start) tm.assert_equal(truncated, expected) truncated = ts.truncate(before=start_missing) tm.assert_equal(truncated, expected) # end specified expected = ts[:3] truncated = ts.truncate(after=end) tm.assert_equal(truncated, expected) truncated = ts.truncate(after=end_missing) tm.assert_equal(truncated, expected) # corner case, empty series/frame returned truncated = ts.truncate(after=ts.index[0] - ts.index.freq) assert len(truncated) == 0 truncated = ts.truncate(before=ts.index[-1] + ts.index.freq) assert len(truncated) == 0 msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00" with pytest.raises(ValueError, match=msg): ts.truncate(before=ts.index[-1] - ts.index.freq, after=ts.index[0] + ts.index.freq)
def test_transpose(self): for obj in self.objs: tm.assert_equal(obj.transpose(), obj)