예제 #1
0
def _new_DatetimeIndex(cls, d):
    """
    This is called upon unpickling, rather than the default which doesn't
    have arguments and breaks __new__
    """
    if "data" in d and not isinstance(d["data"], DatetimeIndex):
        # Avoid need to verify integrity by calling simple_new directly
        data = d.pop("data")
        if not isinstance(data, DatetimeArray):
            # For backward compat with older pickles, we may need to construct
            #  a DatetimeArray to adapt to the newer _simple_new signature
            tz = d.pop("tz")
            freq = d.pop("freq")
            dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq)
        else:
            dta = data
            for key in ["tz", "freq"]:
                # These are already stored in our DatetimeArray; if they are
                #  also in the pickle and don't match, we have a problem.
                if key in d:
                    assert d.pop(key) == getattr(dta, key)
        result = cls._simple_new(dta, **d)
    else:
        with warnings.catch_warnings():
            # TODO: If we knew what was going in to **d, we might be able to
            #  go through _simple_new instead
            warnings.simplefilter("ignore")
            result = cls.__new__(cls, **d)

    return result
예제 #2
0
파일: datetimes.py 프로젝트: dwhu/pandas
    def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
        """
        We require the we have a dtype compat for the values
        if we are passed a non-dtype compat, then coerce using the constructor
        """
        if isinstance(values, DatetimeArray):
            if tz:
                tz = validate_tz_from_dtype(dtype, tz)
                dtype = DatetimeTZDtype(tz=tz)
            elif dtype is None:
                dtype = _NS_DTYPE

            values = DatetimeArray(values, freq=freq, dtype=dtype)
            tz = values.tz
            freq = values.freq
            values = values._data

        # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
        if isinstance(values, DatetimeIndex):
            values = values._data

        dtype = tz_to_dtype(tz)
        dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype)
        assert isinstance(dtarr, DatetimeArray)

        result = object.__new__(cls)
        result._data = dtarr
        result.name = name
        result._no_setting_name = False
        # For groupby perf. See note in indexes/base about _index_data
        result._index_data = dtarr._data
        result._reset_identity()
        return result
예제 #3
0
파일: datetimes.py 프로젝트: vinks4u/pandas
    def __setstate__(self, state):
        """
        Necessary for making this object picklable.
        """
        if isinstance(state, dict):
            super().__setstate__(state)

        elif isinstance(state, tuple):

            # < 0.15 compat
            if len(state) == 2:
                nd_state, own_state = state
                data = np.empty(nd_state[1], dtype=nd_state[2])
                np.ndarray.__setstate__(data, nd_state)

                freq = own_state[1]
                tz = timezones.tz_standardize(own_state[2])
                dtype = tz_to_dtype(tz)
                dtarr = DatetimeArray._simple_new(data, freq=freq, dtype=dtype)

                self.name = own_state[0]

            else:  # pragma: no cover
                data = np.empty(state)
                np.ndarray.__setstate__(data, state)
                dtarr = DatetimeArray(data)

            self._data = dtarr
            self._reset_identity()

        else:
            raise Exception("invalid pickle state")
예제 #4
0
def _to_datetime_with_format(
    arg,
    orig_arg,
    name,
    tz,
    fmt: str,
    exact: bool,
    errors: Optional[str],
    infer_datetime_format: bool,
) -> Optional[Index]:
    """
    Try parsing with the given format, returning None on failure.
    """
    result = None
    try:
        # shortcut formatting here
        if fmt == "%Y%m%d":
            # pass orig_arg as float-dtype may have been converted to
            # datetime64[ns]
            orig_arg = ensure_object(orig_arg)
            try:
                # may return None without raising
                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
                raise ValueError(
                    "cannot convert the input to '%Y%m%d' date format"
                ) from err
            if result is not None:
                utc = tz == "utc"
                return _box_as_indexlike(result, utc=utc, name=name)

        # fallback
        if result is None:
            # error: Incompatible types in assignment (expression has type
            # "Optional[Index]", variable has type "Optional[ndarray]")
            result = _array_strptime_with_fallback(  # type: ignore[assignment]
                arg, name, tz, fmt, exact, errors, infer_datetime_format
            )
            if result is not None:
                return result

    except ValueError as e:
        # Fallback to try to convert datetime objects if timezone-aware
        #  datetime objects are found without passing `utc=True`
        try:
            values, tz = conversion.datetime_to_datetime64(arg)
            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
            return DatetimeIndex._simple_new(dta, name=name)
        except (ValueError, TypeError):
            raise e

    # error: Incompatible return value type (got "Optional[ndarray]", expected
    # "Optional[Index]")
    return result  # type: ignore[return-value]
예제 #5
0
def _to_datetime_with_format(
    arg,
    orig_arg,
    name,
    tz,
    fmt: str,
    exact: bool,
    errors: str,
    infer_datetime_format: bool,
) -> Index | None:
    """
    Try parsing with the given format, returning None on failure.
    """
    result = None
    try:
        # shortcut formatting here
        if fmt == "%Y%m%d":
            # pass orig_arg as float-dtype may have been converted to
            # datetime64[ns]
            orig_arg = ensure_object(orig_arg)
            try:
                # may return None without raising
                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
                raise ValueError(
                    "cannot convert the input to '%Y%m%d' date format"
                ) from err
            if result is not None:
                utc = tz == "utc"
                return _box_as_indexlike(result, utc=utc, name=name)

        # fallback
        res = _array_strptime_with_fallback(
            arg, name, tz, fmt, exact, errors, infer_datetime_format
        )
        return res

    except ValueError as err:
        # Fallback to try to convert datetime objects if timezone-aware
        #  datetime objects are found without passing `utc=True`
        try:
            values, tz = conversion.datetime_to_datetime64(arg)
            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
            return DatetimeIndex._simple_new(dta, name=name)
        except (ValueError, TypeError):
            raise err
예제 #6
0
def _convert_listlike_datetimes(
    arg,
    format: str | None,
    name: Hashable = None,
    tz: Timezone | None = None,
    unit: str | None = None,
    errors: str = "raise",
    infer_datetime_format: bool = False,
    dayfirst: bool | None = None,
    yearfirst: bool | None = None,
    exact: bool = True,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : str
        None or string of the frequency of the passed data
    errors : str
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : bool, default False
        inferring format behavior from to_datetime
    dayfirst : bool
        dayfirst parsing behavior from to_datetime
    yearfirst : bool
        yearfirst parsing behavior from to_datetime
    exact : bool, default True
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        return _to_datetime_with_unit(arg, unit, name, tz, errors)
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            npvalues = np.array(["NaT"],
                                dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(npvalues, name=name)
        elif errors == "ignore":
            idx = Index(arg, name=name)
            return idx
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    if format is not None:
        res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact,
                                       errors, infer_datetime_format)
        if res is not None:
            return res

    assert format is None or infer_datetime_format
    utc = tz == "utc"
    result, tz_parsed = objects_to_datetime64ns(
        arg,
        dayfirst=dayfirst,
        yearfirst=yearfirst,
        utc=utc,
        errors=errors,
        require_iso8601=require_iso8601,
        allow_object=True,
    )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
예제 #7
0
def _convert_listlike_datetimes(
    arg,
    format,
    name=None,
    tz=None,
    unit=None,
    errors=None,
    infer_datetime_format=None,
    dayfirst=None,
    yearfirst=None,
    exact=None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has
            # no attribute "tz_convert"
            arg = arg.tz_convert(None).tz_localize(tz)  # type: ignore
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has
            # no attribute "tz_localize"
            return arg.tz_localize(tz)  # type: ignore

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "_values", arg)

        # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
        # because it expects an ndarray argument
        if isinstance(arg, IntegerArray):
            result = arg.astype(f"datetime64[{unit}]")
            tz_parsed = None
        else:

            result, tz_parsed = tslib.array_with_unit_to_datetime(
                arg, unit, errors=errors)

        if errors == "ignore":

            result = Index(result, name=name)
        else:
            result = DatetimeIndex(result, name=name)
        # GH 23758: We may still need to localize the result with tz
        # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
        # result will be naive but in UTC
        try:
            result = result.tz_localize("UTC").tz_convert(tz_parsed)
        except AttributeError:
            # Regular Index from 'ignore' path
            return result
        if tz is not None:
            if result.tz is None:
                result = result.tz_localize(tz)
            else:
                result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(result, name=name)
        elif errors == "ignore":
            result = Index(arg, name=name)
            return result
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError,
                        tslibs.OutOfBoundsDatetime) as err:
                    raise ValueError(
                        "cannot convert the input to '%Y%m%d' date format"
                    ) from err

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
                return DatetimeIndex._simple_new(dta, name=name)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)