Ejemplo n.º 1
0
def objects_to_td64ns(data, unit="ns", errors="raise"):
    """
    Convert a object-dtyped or string-dtyped array into an
    timedelta64[ns]-dtyped array.

    Parameters
    ----------
    data : ndarray or Index
    unit : str, default "ns"
        The timedelta unit to treat integers as multiples of.
    errors : {"raise", "coerce", "ignore"}, default "raise"
        How to handle elements that cannot be converted to timedelta64[ns].
        See ``pandas.to_timedelta`` for details.

    Returns
    -------
    numpy.ndarray : timedelta64[ns] array converted from data

    Raises
    ------
    ValueError : Data cannot be converted to timedelta64[ns].

    Notes
    -----
    Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
    errors to be ignored; they are caught and subsequently ignored at a
    higher level.
    """
    # coerce Index to np.ndarray, converting string-dtype if necessary
    values = np.array(data, dtype=np.object_, copy=False)

    result = array_to_timedelta64(values,
                                  unit=unit, errors=errors)
    return result.view('timedelta64[ns]')
Ejemplo n.º 2
0
def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
    """Convert a list of objects to a timedelta index object."""

    if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
        arg = np.array(list(arg), dtype='O')

    # these are shortcut-able
    if is_timedelta64_dtype(arg):
        value = arg.astype('timedelta64[ns]')
    elif is_integer_dtype(arg):
        value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype(
            'timedelta64[ns]', copy=False)
    else:
        try:
            value = array_to_timedelta64(ensure_object(arg),
                                         unit=unit, errors=errors)
            value = value.astype('timedelta64[ns]', copy=False)
        except ValueError:
            if errors == 'ignore':
                return arg
            else:
                # This else-block accounts for the cases when errors='raise'
                # and errors='coerce'. If errors == 'raise', these errors
                # should be raised. If errors == 'coerce', we shouldn't
                # expect any errors to be raised, since all parsing errors
                # cause coercion to pd.NaT. However, if an error / bug is
                # introduced that causes an Exception to be raised, we would
                # like to surface it.
                raise

    if box:
        from pandas import TimedeltaIndex
        value = TimedeltaIndex(value, unit='ns', name=name)
    return value
Ejemplo n.º 3
0
    def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
                periods=None, closed=None, dtype=None, copy=False,
                name=None, verify_integrity=True):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                return data.copy()
            else:
                return data._shallow_copy()

        freq, freq_infer = dtl.maybe_infer_freq(freq)

        if data is None:
            # TODO: Remove this block and associated kwargs; GH#20535
            result = cls._generate_range(start, end, periods, freq,
                                         closed=closed)
            result.name = name
            return result

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if is_scalar(data):
            raise ValueError('TimedeltaIndex() must be called with a '
                             'collection of some kind, {data} was passed'
                             .format(data=repr(data)))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        data = np.array(data, copy=False)
        if data.dtype == np.object_:
            data = array_to_timedelta64(data)
        if data.dtype != _TD_DTYPE:
            if is_timedelta64_dtype(data):
                # non-nano unit
                # TODO: watch out for overflows
                data = data.astype(_TD_DTYPE)
            else:
                data = ensure_int64(data).view(_TD_DTYPE)

        assert data.dtype == 'm8[ns]', data.dtype

        subarr = cls._simple_new(data, name=name, freq=freq)
        # check that we are matching freqs
        if verify_integrity and len(subarr) > 0:
            if freq is not None and not freq_infer:
                cls._validate_frequency(subarr, freq)

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.freq = to_offset(inferred)

        return subarr
Ejemplo n.º 4
0
    def _simple_new(cls, values, name=None, freq=None, **kwargs):
        values = np.array(values, copy=False)
        if values.dtype == np.object_:
            values = array_to_timedelta64(values)
        if values.dtype != _TD_DTYPE:
            values = _ensure_int64(values).view(_TD_DTYPE)

        result = object.__new__(cls)
        result._data = values
        result.name = name
        result.freq = freq
        result._reset_identity()
        return result
Ejemplo n.º 5
0
    def __new__(cls, values, freq=None):

        freq, freq_infer = dtl.maybe_infer_freq(freq)

        values = np.array(values, copy=False)
        if values.dtype == np.object_:
            values = array_to_timedelta64(values)

        result = cls._simple_new(values, freq=freq)
        if freq_infer:
            result.freq = to_offset(result.inferred_freq)

        return result
Ejemplo n.º 6
0
    def _simple_new(cls, values, freq=None, **kwargs):
        values = np.array(values, copy=False)
        if values.dtype == np.object_:
            values = array_to_timedelta64(values)
        if values.dtype != _TD_DTYPE:
            if is_timedelta64_dtype(values):
                # non-nano unit
                values = values.astype(_TD_DTYPE)
            else:
                values = _ensure_int64(values).view(_TD_DTYPE)

        result = object.__new__(cls)
        result._data = values
        result._freq = freq
        return result
Ejemplo n.º 7
0
    def __new__(cls, values, freq=None):

        freq, freq_infer = dtl.maybe_infer_freq(freq)

        values = np.array(values, copy=False)
        if values.dtype == np.object_:
            values = array_to_timedelta64(values)

        result = cls._simple_new(values, freq=freq)
        if freq_infer:
            inferred = result.inferred_freq
            if inferred:
                result.freq = to_offset(inferred)

        return result
Ejemplo n.º 8
0
    def _simple_new(cls, values, freq=None, **kwargs):
        values = np.array(values, copy=False)
        if values.dtype == np.object_:
            values = array_to_timedelta64(values)
        if values.dtype != _TD_DTYPE:
            if is_timedelta64_dtype(values):
                # non-nano unit
                values = values.astype(_TD_DTYPE)
            else:
                values = ensure_int64(values).view(_TD_DTYPE)

        result = object.__new__(cls)
        result._data = values
        result._freq = freq
        return result
Ejemplo n.º 9
0
def astype_nansafe(
    arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False
) -> ArrayLike:
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype or ExtensionDtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """

    # We get here with 0-dim from sparse
    arr = np.atleast_1d(arr)

    # dispatch on extension dtype if needed
    if isinstance(dtype, ExtensionDtype):
        return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)

    elif not isinstance(dtype, np.dtype):  # pragma: no cover
        raise ValueError("dtype must be np.dtype or ExtensionDtype")

    if arr.dtype.kind in ["m", "M"] and (
        issubclass(dtype.type, str) or dtype == _dtype_obj
    ):
        from pandas.core.construction import ensure_wrapped_if_datetimelike

        arr = ensure_wrapped_if_datetimelike(arr)
        return arr.astype(dtype, copy=copy)

    if issubclass(dtype.type, str):
        shape = arr.shape
        if arr.ndim > 1:
            arr = arr.ravel()
        return lib.ensure_string_array(
            arr, skipna=skipna, convert_na_value=False
        ).reshape(shape)

    elif is_datetime64_dtype(arr.dtype):
        if dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")

    elif is_timedelta64_dtype(arr.dtype):
        if dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        elif dtype.kind == "m":
            return astype_td64_unit_conversion(arr, dtype, copy=copy)

        raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

    elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
        return _astype_float_to_int_nansafe(arr, dtype, copy)

    elif is_object_dtype(arr.dtype):

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        if is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(
                to_datetime(arr.ravel()).values.reshape(arr.shape),
                dtype,
                copy=copy,
            )
        elif is_timedelta64_dtype(dtype):
            # bc we know arr.dtype == object, this is equivalent to
            #  `np.asarray(to_timedelta(arr))`, but using a lower-level API that
            #  does not require a circular import.
            return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = (
            f"The '{dtype.name}' dtype has no unit. Please pass in "
            f"'{dtype.name}[ns]' instead."
        )
        raise ValueError(msg)

    if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.astype(dtype, copy=copy)
Ejemplo n.º 10
0
 def test_array_to_timedelta64_string_with_unit_2d_raises(self):
     # check the 'unit is not None and errors != "coerce"' path
     #  in array_to_timedelta64 raises correctly with 2D values
     values = np.array([["1", 2], [3, "4"]], dtype=object)
     with pytest.raises(ValueError, match="unit must not be specified"):
         array_to_timedelta64(values, unit="s")
Ejemplo n.º 11
0
    def __new__(cls,
                data=None,
                unit=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                closed=None,
                dtype=None,
                copy=False,
                name=None,
                verify_integrity=True):

        if isinstance(data, TimedeltaIndex) and freq is None and name is None:
            if copy:
                return data.copy()
            else:
                return data._shallow_copy()

        freq, freq_infer = dtl.maybe_infer_freq(freq)

        if data is None:
            # TODO: Remove this block and associated kwargs; GH#20535
            result = cls._generate_range(start,
                                         end,
                                         periods,
                                         freq,
                                         closed=closed)
            result.name = name
            return result

        if unit is not None:
            data = to_timedelta(data, unit=unit, box=False)

        if is_scalar(data):
            raise ValueError(
                'TimedeltaIndex() must be called with a '
                'collection of some kind, {data} was passed'.format(
                    data=repr(data)))

        # convert if not already
        if getattr(data, 'dtype', None) != _TD_DTYPE:
            data = to_timedelta(data, unit=unit, box=False)
        elif copy:
            data = np.array(data, copy=True)

        data = np.array(data, copy=False)
        if data.dtype == np.object_:
            data = array_to_timedelta64(data)
        if data.dtype != _TD_DTYPE:
            if is_timedelta64_dtype(data):
                # non-nano unit
                # TODO: watch out for overflows
                data = data.astype(_TD_DTYPE)
            else:
                data = ensure_int64(data).view(_TD_DTYPE)

        assert data.dtype == 'm8[ns]', data.dtype

        subarr = cls._simple_new(data, name=name, freq=freq)
        # check that we are matching freqs
        if verify_integrity and len(subarr) > 0:
            if freq is not None and not freq_infer:
                cls._validate_frequency(subarr, freq)

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.freq = to_offset(inferred)

        return subarr