def test_astype_object(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArrayMixin(tdi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' assert list(asobj) == list(tdi)
def test_astype_object(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArray(tdi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' assert list(asobj) == list(tdi)
def test_to_pytimedelta(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArray(tdi) expected = tdi.to_pytimedelta() result = arr.to_pytimedelta() tm.assert_numpy_array_equal(result, expected)
def test_to_pytimedelta(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArrayMixin(tdi) expected = tdi.to_pytimedelta() result = arr.to_pytimedelta() tm.assert_numpy_array_equal(result, expected)
def test_total_seconds(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArrayMixin(tdi) expected = tdi.total_seconds() result = arr.total_seconds() tm.assert_numpy_array_equal(result, expected.values)
def test_total_seconds(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArray(tdi) expected = tdi.total_seconds() result = arr.total_seconds() tm.assert_numpy_array_equal(result, expected.values)
def _add_delta(self, delta): """ Add a timedelta-like, DateOffset, or TimedeltaIndex-like object to self. Parameters ---------- delta : {timedelta, np.timedelta64, DateOffset, TimedeltaIndex, ndarray[timedelta64]} Returns ------- result : same type as self Notes ----- The result's name is set outside of _add_delta by the calling method (__add__ or __sub__) """ from pandas.core.arrays import TimedeltaArrayMixin if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) elif is_timedelta64_dtype(delta): if not isinstance(delta, TimedeltaArrayMixin): delta = TimedeltaArrayMixin(delta) new_values = self._add_delta_tdi(delta) else: new_values = self.astype('O') + delta tz = 'UTC' if self.tz is not None else None result = type(self)(new_values, tz=tz, freq='infer') if self.tz is not None and self.tz is not utc: result = result.tz_convert(self.tz) return result
def test_min_max_empty(self, skipna): arr = TimedeltaArray._from_sequence([]) result = arr.min(skipna=skipna) assert result is pd.NaT result = arr.max(skipna=skipna) assert result is pd.NaT
def __sub__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(-other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(-other) elif isinstance(other, (datetime, np.datetime64)): result = self._sub_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(-other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.sub) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] result = self._sub_datetime_arraylike(other) elif is_period_dtype(other): # PeriodIndex result = self._sub_period_array(other) elif is_integer_dtype(other): if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.sub) elif isinstance(other, ABCIndexClass): raise TypeError("cannot subtract {cls} and {typ}".format( cls=type(self).__name__, typ=type(other).__name__)) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot subtract {dtype}-dtype from {cls}".format( dtype=other.dtype, cls=type(self).__name__)) elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def test_int_properties(self, timedelta_index, propname): tdi = timedelta_index arr = TimedeltaArrayMixin(tdi) result = getattr(arr, propname) expected = np.array(getattr(tdi, propname), dtype=result.dtype) tm.assert_numpy_array_equal(result, expected)
def __add__(self, other): other = lib.item_from_zerodim(other) if isinstance(other, (ABCSeries, ABCDataFrame)): return NotImplemented # scalar others elif other is NaT: result = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_delta(other) elif isinstance(other, DateOffset): # specifically _not_ a Tick result = self._add_offset(other) elif isinstance(other, (datetime, np.datetime64)): result = self._add_datetimelike_scalar(other) elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._time_shift(other) # array-like others elif is_timedelta64_dtype(other): # TimedeltaIndex, ndarray[timedelta64] result = self._add_delta(other) elif is_offsetlike(other): # Array/Index of DateOffset objects result = self._addsub_offset_array(other, operator.add) elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): # DatetimeIndex, ndarray[datetime64] return self._add_datetime_arraylike(other) elif is_integer_dtype(other): if not is_period_dtype(self): maybe_integer_op_deprecated(self) result = self._addsub_int_array(other, operator.add) elif is_float_dtype(other): # Explicitly catch invalid dtypes raise TypeError("cannot add {dtype}-dtype to {cls}" .format(dtype=other.dtype, cls=type(self).__name__)) elif is_period_dtype(other): # if self is a TimedeltaArray and other is a PeriodArray with # a timedelta-like (i.e. Tick) freq, this operation is valid. # Defer to the PeriodArray implementation. # In remaining cases, this will end up raising TypeError. return NotImplemented elif is_extension_array_dtype(other): # Categorical op will raise; defer explicitly return NotImplemented else: # pragma: no cover return NotImplemented if is_timedelta64_dtype(result) and isinstance(result, np.ndarray): from pandas.core.arrays import TimedeltaArrayMixin # TODO: infer freq? return TimedeltaArrayMixin(result) return result
def test_from_tdi(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArrayMixin(tdi) assert list(arr) == list(tdi) # Check that Index.__new__ knows what to do with TimedeltaArray tdi2 = pd.Index(arr) assert isinstance(tdi2, pd.TimedeltaIndex) assert list(tdi2) == list(arr)
def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence([pd.Timedelta('1H'), pd.Timedelta('2H')]) result = arr.astype(dtype) if np.dtype(dtype).kind == 'u': expected_dtype = np.dtype('uint64') else: expected_dtype = np.dtype('int64') expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected)
def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence( [pd.Timedelta('1H'), pd.Timedelta('2H')]) result = arr.astype(dtype) if np.dtype(dtype).kind == 'u': expected_dtype = np.dtype('uint64') else: expected_dtype = np.dtype('int64') expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected)
def test_min_max(self): arr = TimedeltaArray._from_sequence([ '3H', '3H', 'NaT', '2H', '5H', '4H', ]) result = arr.min() expected = pd.Timedelta('2H') assert result == expected result = arr.max() expected = pd.Timedelta('5H') assert result == expected result = arr.min(skipna=False) assert result is pd.NaT result = arr.max(skipna=False) assert result is pd.NaT
def to_perioddelta(self, freq): """ Calculate TimedeltaArray of difference between index values and index converted to PeriodArray at specified freq. Used for vectorized offsets Parameters ---------- freq: Period frequency Returns ------- TimedeltaArray/Index """ # TODO: consider privatizing (discussion in GH#23113) from pandas.core.arrays.timedeltas import TimedeltaArrayMixin i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 return TimedeltaArrayMixin(i8delta)
def test_take_fill_valid(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArray(tdi) td1 = pd.Timedelta(days=1) result = arr.take([-1, 1], allow_fill=True, fill_value=td1) assert result[0] == td1 now = pd.Timestamp.now() with pytest.raises(ValueError): # fill_value Timestamp invalid arr.take([0, 1], allow_fill=True, fill_value=now) with pytest.raises(ValueError): # fill_value Period invalid arr.take([0, 1], allow_fill=True, fill_value=now.to_period('D'))
def test_from_sequence_dtype(self): msg = r"Only timedelta64\[ns\] dtype is valid" with pytest.raises(ValueError, match=msg): TimedeltaArray._from_sequence([], dtype=object) with pytest.raises(ValueError, match=msg): TimedeltaArray([], dtype=object)
def array(data, # type: Sequence[object] dtype=None, # type: Optional[Union[str, np.dtype, ExtensionDtype]] copy=True, # type: bool ): # type: (...) -> ExtensionArray """ Create an array. .. versionadded:: 0.24.0 Parameters ---------- data : Sequence of objects The scalars inside `data` should be instances of the scalar type for `dtype`. It's expected that `data` represents a 1-dimensional array of data. When `data` is an Index or Series, the underlying array will be extracted from `data`. dtype : str, np.dtype, or ExtensionDtype, optional The dtype to use for the array. This may be a NumPy dtype or an extension type registered with pandas using :meth:`pandas.api.extensions.register_extension_dtype`. If not specified, there are two possibilities: 1. When `data` is a :class:`Series`, :class:`Index`, or :class:`ExtensionArray`, the `dtype` will be taken from the data. 2. Otherwise, pandas will attempt to infer the `dtype` from the data. Note that when `data` is a NumPy array, ``data.dtype`` is *not* used for inferring the array type. This is because NumPy cannot represent all the types of data that can be held in extension arrays. Currently, pandas will infer an extension dtype for sequences of ============================== ===================================== scalar type Array Type ============================= ===================================== * :class:`pandas.Interval` :class:`pandas.IntervalArray` * :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` * :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` * :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` ============================= ===================================== For all other cases, NumPy's usual inference rules will be used. copy : bool, default True Whether to copy the data, even if not necessary. Depending on the type of `data`, creating the new array may require copying data, even if ``copy=False``. Returns ------- ExtensionArray The newly created array. Raises ------ ValueError When `data` is not 1-dimensional. See Also -------- numpy.array : Construct a NumPy array. arrays.PandasArray : ExtensionArray wrapping a NumPy array. Series : Construct a pandas Series. Index : Construct a pandas Index. Notes ----- Omitting the `dtype` argument means pandas will attempt to infer the best array type from the values in the data. As new array types are added by pandas and 3rd party libraries, the "best" array type may change. We recommend specifying `dtype` to ensure that 1. the correct array type for the data is returned 2. the returned array type doesn't change as new extension types are added by pandas and third-party libraries Additionally, if the underlying memory representation of the returned array matters, we recommend specifying the `dtype` as a concrete object rather than a string alias or allowing it to be inferred. For example, a future version of pandas or a 3rd-party library may include a dedicated ExtensionArray for string data. In this event, the following would no longer return a :class:`arrays.PandasArray` backed by a NumPy array. >>> pd.array(['a', 'b'], dtype=str) <PandasArray> ['a', 'b'] Length: 2, dtype: str32 This would instead return the new ExtensionArray dedicated for string data. If you really need the new array to be backed by a NumPy array, specify that in the dtype. >>> pd.array(['a', 'b'], dtype=np.dtype("<U1")) <PandasArray> ['a', 'b'] Length: 2, dtype: str32 Or use the dedicated constructor for the array you're expecting, and wrap that in a PandasArray >>> pd.array(np.array(['a', 'b'], dtype='<U1')) <PandasArray> ['a', 'b'] Length: 2, dtype: str32 Examples -------- If a dtype is not specified, `data` is passed through to :meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned. >>> pd.array([1, 2]) <PandasArray> [1, 2] Length: 2, dtype: int64 Or the NumPy dtype can be specified >>> pd.array([1, 2], dtype=np.dtype("int32")) <PandasArray> [1, 2] Length: 2, dtype: int32 You can use the string alias for `dtype` >>> pd.array(['a', 'b', 'a'], dtype='category') [a, b, a] Categories (2, object): [a, b] Or specify the actual dtype >>> pd.array(['a', 'b', 'a'], ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) [a, b, a] Categories (3, object): [a < b < c] Because omitting the `dtype` passes the data through to NumPy, a mixture of valid integers and NA will return a floating-point NumPy array. >>> pd.array([1, 2, np.nan]) <PandasArray> [1.0, 2.0, nan] Length: 3, dtype: float64 To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify the dtype: >>> pd.array([1, 2, np.nan], dtype='Int64') <IntegerArray> [1, 2, NaN] Length: 3, dtype: Int64 Pandas will infer an ExtensionArray for some types of data: >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) <PeriodArray> ['2000-01-01', '2000-01-01'] Length: 2, dtype: period[D] `data` must be 1-dimensional. A ValueError is raised when the input has the wrong dimensionality. >>> pd.array(1) Traceback (most recent call last): ... ValueError: Cannot pass scalar '1' to 'pandas.array'. """ from pandas.core.arrays import ( period_array, ExtensionArray, IntervalArray, PandasArray, DatetimeArrayMixin, TimedeltaArrayMixin, ) from pandas.core.internals.arrays import extract_array if lib.is_scalar(data): msg = ( "Cannot pass scalar '{}' to 'pandas.array'." ) raise ValueError(msg.format(data)) data = extract_array(data, extract_numpy=True) if dtype is None and isinstance(data, ExtensionArray): dtype = data.dtype # this returns None for not-found dtypes. if isinstance(dtype, compat.string_types): dtype = registry.find(dtype) or dtype if is_extension_array_dtype(dtype): cls = dtype.construct_array_type() return cls._from_sequence(data, dtype=dtype, copy=copy) if dtype is None: inferred_dtype = lib.infer_dtype(data) if inferred_dtype == 'period': try: return period_array(data, copy=copy) except tslibs.IncompatibleFrequency: # We may have a mixture of frequencies. # We choose to return an ndarray, rather than raising. pass elif inferred_dtype == 'interval': try: return IntervalArray(data, copy=copy) except ValueError: # We may have a mixture of `closed` here. # We choose to return an ndarray, rather than raising. pass elif inferred_dtype.startswith('datetime'): # datetime, datetime64 try: return DatetimeArrayMixin._from_sequence(data, copy=copy) except ValueError: # Mixture of timezones, fall back to PandasArray pass elif inferred_dtype.startswith('timedelta'): # timedelta, timedelta64 return TimedeltaArrayMixin._from_sequence(data, copy=copy) # TODO(BooleanArray): handle this type result = PandasArray._from_sequence(data, dtype=dtype, copy=copy) return result