def test_categorical_delegations(self): # invalid accessor msg = r"Can only use \.cat accessor with a 'category' dtype" with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat with pytest.raises(AttributeError, match=msg): Series([1, 2, 3]).cat() with pytest.raises(AttributeError, match=msg): Series(["a", "b", "c"]).cat with pytest.raises(AttributeError, match=msg): Series(np.arange(5.0)).cat with pytest.raises(AttributeError, match=msg): Series([Timestamp("20130101")]).cat # Series should delegate calls to '.categories', '.codes', '.ordered' # and the methods '.set_categories()' 'drop_unused_categories()' to the # categorical ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["a", "b", "c"]) tm.assert_index_equal(ser.cat.categories, exp_categories) ser.cat.categories = [1, 2, 3] exp_categories = Index([1, 2, 3]) tm.assert_index_equal(ser.cat.categories, exp_categories) exp_codes = Series([0, 1, 2, 0], dtype="int8") tm.assert_series_equal(ser.cat.codes, exp_codes) assert ser.cat.ordered ser = ser.cat.as_unordered() assert not ser.cat.ordered return_value = ser.cat.as_ordered(inplace=True) assert return_value is None assert ser.cat.ordered # reorder ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) ser = ser.cat.set_categories(["c", "b", "a"]) tm.assert_index_equal(ser.cat.categories, exp_categories) tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) tm.assert_numpy_array_equal(ser.__array__(), exp_values) # remove unused categories ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"])) exp_categories = Index(["a", "b"]) exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_) ser = ser.cat.remove_unused_categories() tm.assert_index_equal(ser.cat.categories, exp_categories) tm.assert_numpy_array_equal(ser.values.__array__(), exp_values) tm.assert_numpy_array_equal(ser.__array__(), exp_values) # This method is likely to be confused, so test that it raises an error # on wrong inputs: msg = "'Series' object has no attribute 'set_categories'" with pytest.raises(AttributeError, match=msg): ser.set_categories([4, 3, 2, 1]) # right: ser.cat.set_categories([4,3,2,1]) # GH#18862 (let Series.cat.rename_categories take callables) ser = Series(Categorical(["a", "b", "c", "a"], ordered=True)) result = ser.cat.rename_categories(lambda x: x.upper()) expected = Series( Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True) ) tm.assert_series_equal(result, expected)
def test_list_numeric(data, arr_kwargs): result = to_numeric(data) expected = np.array(data, **arr_kwargs) tm.assert_numpy_array_equal(result, expected)
def test_ignore_downcast_cannot_convert_float(data, expected, downcast): # Cannot cast to an integer (signed or unsigned) # because we have a float number. res = to_numeric(data, downcast=downcast) tm.assert_numpy_array_equal(res, expected)
def test_searchsorted_sorter(self, any_real_dtype): arr = pd.array([3, 1, 2], dtype=any_real_dtype) result = arr.searchsorted([0, 3], sorter=np.argsort(arr)) expected = np.array([0, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def test_constructor_imaginary(self): values = [1, 2, 3 + 1j] c1 = Categorical(values) tm.assert_index_equal(c1.categories, Index(values)) tm.assert_numpy_array_equal(np.array(c1), np.array(values))
def assert_array_dicts_equal(left, right): for k, v in left.items(): tm.assert_numpy_array_equal(np.asarray(v), np.asarray(right[k]))
def test_unique_na_fill(arr, fill_value): a = SparseArray(arr, fill_value=fill_value).unique() b = pd.Series(arr).unique() assert isinstance(a, SparseArray) a = np.asarray(a) tm.assert_numpy_array_equal(a, b)
def test_hash_array(series): arr = series.values tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr))
def test_hash_array_mixed(arr2): result1 = hash_array(np.array(["3", "4", "All"])) result2 = hash_array(arr2) tm.assert_numpy_array_equal(result1, result2)
def test_strftime(self, period_index): arr = PeriodArray(period_index) result = arr.strftime("%Y") expected = np.array([per.strftime("%Y") for per in arr], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_constructor_datetime64_tzformat(self, freq): # see GH#6572: ISO 8601 format results in pytz.FixedOffset idx = date_range("2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(-300), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST expected_i8 = date_range("2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima") tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) idx = date_range("2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(540), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range("2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo") tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) # Non ISO 8601 format results in dateutil.tz.tzoffset idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(-300), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST expected_i8 = date_range("2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima") tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(540), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range("2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo") tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8)
def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): other = pd.Categorical(arr1d, ordered=ordered) if as_index: other = pd.CategoricalIndex(other) left, right = arr1d, other if reverse: left, right = right, left ones = np.ones(arr1d.shape, dtype=bool) zeros = ~ones result = left == right tm.assert_numpy_array_equal(result, ones) result = left != right tm.assert_numpy_array_equal(result, zeros) if not reverse and not as_index: # Otherwise Categorical raises TypeError bc it is not ordered # TODO: we should probably get the same behavior regardless? result = left < right tm.assert_numpy_array_equal(result, zeros) result = left <= right tm.assert_numpy_array_equal(result, ones) result = left > right tm.assert_numpy_array_equal(result, zeros) result = left >= right tm.assert_numpy_array_equal(result, ones)
def test_array_interface(self, timedelta_index): arr = TimedeltaArray(timedelta_index) # default asarray gives the same underlying data result = np.asarray(arr) expected = arr._data assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, copy=False) assert result is expected tm.assert_numpy_array_equal(result, expected) # specifying m8[ns] gives the same result as default result = np.asarray(arr, dtype="timedelta64[ns]") expected = arr._data assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="timedelta64[ns]", copy=False) assert result is expected tm.assert_numpy_array_equal(result, expected) result = np.array(arr, dtype="timedelta64[ns]") assert result is not expected tm.assert_numpy_array_equal(result, expected) # to object dtype result = np.asarray(arr, dtype=object) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) # to other dtype always copies result = np.asarray(arr, dtype="int64") assert result is not arr.asi8 assert not np.may_share_memory(arr, result) expected = arr.asi8.copy() tm.assert_numpy_array_equal(result, expected) # other dtypes handled by numpy for dtype in ["float64", str]: result = np.asarray(arr, dtype=dtype) expected = np.asarray(arr).astype(dtype) tm.assert_numpy_array_equal(result, expected)
def test_strftime(self, datetime_index): arr = DatetimeArray(datetime_index) result = arr.strftime("%Y %b") expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_order(self): # GH 10295 idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" ) exp1 = TimedeltaIndex( ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" ) idx2 = TimedeltaIndex( ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2" ) # TODO(wesm): unused? # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', # '3 day', '5 day'], name='idx2') # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', # '2 minute', pd.NaT], name='idx3') # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', # '5 minute'], name='idx3') for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
def test_get_indexer_with_missing_value(index_arr, labels, expected): # issue 19132 idx = MultiIndex.from_arrays(index_arr) result = idx.get_indexer(labels) tm.assert_numpy_array_equal(result, expected)
def test_xtick_barPlot(self): # GH28172 s = pd.Series(range(10), index=[f"P{i:02d}" for i in range(10)]) ax = s.plot.bar(xticks=range(0, 11, 2)) exp = np.array(list(range(0, 11, 2))) tm.assert_numpy_array_equal(exp, ax.get_xticks())
def test_set_dtype_new_categories(self): c = Categorical(["a", "b", "c"]) result = c._set_dtype(CategoricalDtype(list("abcd"))) tm.assert_numpy_array_equal(result.codes, c.codes) tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
def test_shift(self, datetime_series): shifted = datetime_series.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, datetime_series.index) tm.assert_index_equal(unshifted.index, datetime_series.index) tm.assert_numpy_array_equal( unshifted.dropna().values, datetime_series.values[:-1] ) offset = BDay() shifted = datetime_series.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) tm.assert_series_equal(unshifted, datetime_series) unshifted = datetime_series.shift(0, freq=offset) tm.assert_series_equal(unshifted, datetime_series) shifted = datetime_series.shift(1, freq="B") unshifted = shifted.shift(-1, freq="B") tm.assert_series_equal(unshifted, datetime_series) # corner case unshifted = datetime_series.shift(0) tm.assert_series_equal(unshifted, datetime_series) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, BDay()) tm.assert_series_equal(shifted2, shifted3) tm.assert_series_equal(ps, shifted2.shift(-1, "B")) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) tm.assert_series_equal(shifted5, shifted4) # 32-bit taking # GH#8129 index = date_range("2000-01-01", periods=5) for dtype in ["int32", "int64"]: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) tm.assert_series_equal(result, expected) # GH#8260 # with tz s = Series( date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" ) result = s - s.shift() exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, exp) # incompat tz s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") msg = "DatetimeArray subtraction must have the same timezones or no timezones" with pytest.raises(TypeError, match=msg): s - s2
def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected)
def test_searchsorted_numeric_dtypes_vector(self, any_real_dtype): arr = pd.array([1, 3, 90], dtype=any_real_dtype) result = arr.searchsorted([2, 30]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected)
def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with pytest.raises(TypeError, match="unorderable types"): self.index > 0 with pytest.raises(TypeError, match="unorderable types"): self.index <= 0 msg = r"unorderable types: Interval\(\) > int\(\)" with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3)
def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) c1 = Categorical(exp_arr) tm.assert_numpy_array_equal(c1.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["c", "b", "a"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique msg = "Categorical categories must be unique" with pytest.raises(ValueError, match=msg): Categorical([1, 2], [1, 2, 2]) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ["a", "b", "b"]) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) # Series of dtype category c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) # Series c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(Series(["a", "b", "c", "a"])) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats cat = Categorical([np.nan, 1, 2.0, 3]) assert is_float_dtype(cat.categories) cat = Categorical([np.nan, 1.0, 2.0, 3.0]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notna()]) # assert is_integer_dtype(vals) # corner cases cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 # Scalars should be converted to lists cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # noqa # the next one are from the old docs with tm.assert_produces_warning(None): c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): c = Categorical( # noqa np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True)
def test_join_multi(): # GH 10665 midx = pd.MultiIndex.from_product( [np.arange(4), np.arange(4)], names=["a", "b"]) idx = Index([1, 2, 5], name="b") # inner jidx, lidx, ridx = midx.join(idx, how="inner", return_indexers=True) exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=["a", "b"]) exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp) exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp) tm.assert_index_equal(jidx, exp_idx) tm.assert_numpy_array_equal(lidx, exp_lidx) tm.assert_numpy_array_equal(ridx, exp_ridx) # flip jidx, ridx, lidx = idx.join(midx, how="inner", return_indexers=True) tm.assert_index_equal(jidx, exp_idx) tm.assert_numpy_array_equal(lidx, exp_lidx) tm.assert_numpy_array_equal(ridx, exp_ridx) # keep MultiIndex jidx, lidx, ridx = midx.join(idx, how="left", return_indexers=True) exp_ridx = np.array( [-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1], dtype=np.intp) tm.assert_index_equal(jidx, midx) assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx) # flip jidx, ridx, lidx = idx.join(midx, how="right", return_indexers=True) tm.assert_index_equal(jidx, midx) assert lidx is None tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_list(): ser = ["1", "-3.14", "7"] res = to_numeric(ser) expected = np.array([1, -3.14, 7]) tm.assert_numpy_array_equal(res, expected)
def test_get_indexer_date_objs(self): rng = date_range("1/1/2000", periods=20) result = rng.get_indexer(rng.map(lambda x: x.date())) expected = rng.get_indexer(rng) tm.assert_numpy_array_equal(result, expected)
def test_downcast_basic(data, kwargs, exp_dtype): # see gh-13352 result = to_numeric(data, **kwargs) expected = np.array([1, 2, 3], dtype=exp_dtype) tm.assert_numpy_array_equal(result, expected)
def test_get_indexer(self): idx = date_range("2000-01-01", periods=3) exp = np.array([0, 1, 2], dtype=np.intp) tm.assert_numpy_array_equal(idx.get_indexer(idx), exp) target = idx[0] + pd.to_timedelta( ["-1 hour", "12 hours", "1 day 1 hour"]) tm.assert_numpy_array_equal(idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)) tm.assert_numpy_array_equal(idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)) tm.assert_numpy_array_equal( idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")), np.array([0, -1, 1], dtype=np.intp), ) tol_raw = [ pd.Timedelta("1 hour"), pd.Timedelta("1 hour"), pd.Timedelta("1 hour").to_timedelta64(), ] tm.assert_numpy_array_equal( idx.get_indexer(target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]), np.array([0, -1, 1], dtype=np.intp), ) tol_bad = [ pd.Timedelta("2 hour").to_timedelta64(), pd.Timedelta("1 hour").to_timedelta64(), "foo", ] msg = "Could not convert 'foo' to NumPy timedelta" with pytest.raises(ValueError, match=msg): idx.get_indexer(target, "nearest", tolerance=tol_bad) with pytest.raises(ValueError, match="abbreviation w/o a number"): idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
def test_duplicated(idx_dup, keep, expected): result = idx_dup.duplicated(keep=keep) tm.assert_numpy_array_equal(result, expected)
def test_parsing_valid_dates(data, expected): arr = np.array(data, dtype=object) result, _ = tslib.array_to_datetime(arr) expected = np_array_datetime64_compat(expected, dtype="M8[ns]") tm.assert_numpy_array_equal(result, expected)