def _new_DatetimeIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ if "data" in d and not isinstance(d["data"], DatetimeIndex): # Avoid need to verify integrity by calling simple_new directly data = d.pop("data") if not isinstance(data, DatetimeArray): # For backward compat with older pickles, we may need to construct # a DatetimeArray to adapt to the newer _simple_new signature tz = d.pop("tz") freq = d.pop("freq") dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) else: dta = data for key in ["tz", "freq"]: # These are already stored in our DatetimeArray; if they are # also in the pickle and don't match, we have a problem. if key in d: assert d.pop(key) == getattr(dta, key) result = cls._simple_new(dta, **d) else: with warnings.catch_warnings(): # TODO: If we knew what was going in to **d, we might be able to # go through _simple_new instead warnings.simplefilter("ignore") result = cls.__new__(cls, **d) return result
def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): """ We require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ if isinstance(values, DatetimeArray): if tz: tz = validate_tz_from_dtype(dtype, tz) dtype = DatetimeTZDtype(tz=tz) elif dtype is None: dtype = _NS_DTYPE values = DatetimeArray(values, freq=freq, dtype=dtype) tz = values.tz freq = values.freq values = values._data # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes if isinstance(values, DatetimeIndex): values = values._data dtype = tz_to_dtype(tz) dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) assert isinstance(dtarr, DatetimeArray) result = object.__new__(cls) result._data = dtarr result.name = name result._no_setting_name = False # For groupby perf. See note in indexes/base about _index_data result._index_data = dtarr._data result._reset_identity() return result
def __setstate__(self, state): """ Necessary for making this object picklable. """ if isinstance(state, dict): super().__setstate__(state) elif isinstance(state, tuple): # < 0.15 compat if len(state) == 2: nd_state, own_state = state data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) freq = own_state[1] tz = timezones.tz_standardize(own_state[2]) dtype = tz_to_dtype(tz) dtarr = DatetimeArray._simple_new(data, freq=freq, dtype=dtype) self.name = own_state[0] else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(data, state) dtarr = DatetimeArray(data) self._data = dtarr self._reset_identity() else: raise Exception("invalid pickle state")
def _to_datetime_with_format( arg, orig_arg, name, tz, fmt: str, exact: bool, errors: Optional[str], infer_datetime_format: bool, ) -> Optional[Index]: """ Try parsing with the given format, returning None on failure. """ result = None try: # shortcut formatting here if fmt == "%Y%m%d": # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) try: # may return None without raising result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, OutOfBoundsDatetime) as err: raise ValueError( "cannot convert the input to '%Y%m%d' date format" ) from err if result is not None: utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name) # fallback if result is None: # error: Incompatible types in assignment (expression has type # "Optional[Index]", variable has type "Optional[ndarray]") result = _array_strptime_with_fallback( # type: ignore[assignment] arg, name, tz, fmt, exact, errors, infer_datetime_format ) if result is not None: return result except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise e # error: Incompatible return value type (got "Optional[ndarray]", expected # "Optional[Index]") return result # type: ignore[return-value]
def _to_datetime_with_format( arg, orig_arg, name, tz, fmt: str, exact: bool, errors: str, infer_datetime_format: bool, ) -> Index | None: """ Try parsing with the given format, returning None on failure. """ result = None try: # shortcut formatting here if fmt == "%Y%m%d": # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) try: # may return None without raising result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, OutOfBoundsDatetime) as err: raise ValueError( "cannot convert the input to '%Y%m%d' date format" ) from err if result is not None: utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name) # fallback res = _array_strptime_with_fallback( arg, name, tz, fmt, exact, errors, infer_datetime_format ) return res except ValueError as err: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise err
def _convert_listlike_datetimes( arg, format: str | None, name: Hashable = None, tz: Timezone | None = None, unit: str | None = None, errors: str = "raise", infer_datetime_format: bool = False, dayfirst: bool | None = None, yearfirst: bool | None = None, exact: bool = True, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : bool, default False inferring format behavior from to_datetime dayfirst : bool dayfirst parsing behavior from to_datetime yearfirst : bool yearfirst parsing behavior from to_datetime exact : bool, default True exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex return arg.tz_localize(tz) return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") return _to_datetime_with_unit(arg, unit, name, tz, errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(npvalues, name=name) elif errors == "ignore": idx = Index(arg, name=name) return idx raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None if format is not None: res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format) if res is not None: return res assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)
def _convert_listlike_datetimes( arg, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parsed name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- Index-like of parsed dates """ if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") arg_dtype = getattr(arg, "dtype", None) # these are shortcutable if is_datetime64tz_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has # no attribute "tz_convert" arg = arg.tz_convert(None).tz_localize(tz) # type: ignore return arg elif is_datetime64_ns_dtype(arg_dtype): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass elif tz: # DatetimeArray, DatetimeIndex # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has # no attribute "tz_localize" return arg.tz_localize(tz) # type: ignore return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, "_values", arg) # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): result = arg.astype(f"datetime64[{unit}]") tz_parsed = None else: result, tz_parsed = tslib.array_with_unit_to_datetime( arg, unit, errors=errors) if errors == "ignore": result = Index(result, name=name) else: result = DatetimeIndex(result, name=name) # GH 23758: We may still need to localize the result with tz # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) # result will be naive but in UTC try: result = result.tz_localize("UTC").tz_convert(tz_parsed) except AttributeError: # Regular Index from 'ignore' path return result if tz is not None: if result.tz is None: result = result.tz_localize(tz) else: result = result.tz_convert(tz) return result elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg try: arg, _ = maybe_convert_dtype(arg, copy=False) except TypeError: if errors == "coerce": result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(result, name=name) elif errors == "ignore": result = Index(arg, name=name) return result raise arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == "%Y%m%d": try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime) as err: raise ValueError( "cannot convert the input to '%Y%m%d' date format" ) from err # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if "%Z" in format or "%z" in format: return _return_parsed_timezone_results( result, timezones, tz, name) except tslibs.OutOfBoundsDatetime: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) dta = DatetimeArray(values, dtype=tz_to_dtype(tz)) return DatetimeIndex._simple_new(dta, name=name) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array # is in UTC dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed)) return DatetimeIndex._simple_new(dta, name=name) utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name)