Ejemplo n.º 1
0
def _to_datetime_with_format(
    arg,
    orig_arg,
    name,
    tz,
    fmt: str,
    exact: bool,
    errors: Optional[str],
    infer_datetime_format: bool,
) -> Optional[Index]:
    """
    Try parsing with the given format, returning None on failure.
    """
    result = None
    try:
        # shortcut formatting here
        if fmt == "%Y%m%d":
            # pass orig_arg as float-dtype may have been converted to
            # datetime64[ns]
            orig_arg = ensure_object(orig_arg)
            try:
                # may return None without raising
                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
                raise ValueError(
                    "cannot convert the input to '%Y%m%d' date format"
                ) from err
            if result is not None:
                utc = tz == "utc"
                return _box_as_indexlike(result, utc=utc, name=name)

        # fallback
        if result is None:
            # error: Incompatible types in assignment (expression has type
            # "Optional[Index]", variable has type "Optional[ndarray]")
            result = _array_strptime_with_fallback(  # type: ignore[assignment]
                arg, name, tz, fmt, exact, errors, infer_datetime_format
            )
            if result is not None:
                return result

    except ValueError as e:
        # Fallback to try to convert datetime objects if timezone-aware
        #  datetime objects are found without passing `utc=True`
        try:
            values, tz = conversion.datetime_to_datetime64(arg)
            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
            return DatetimeIndex._simple_new(dta, name=name)
        except (ValueError, TypeError):
            raise e

    # error: Incompatible return value type (got "Optional[ndarray]", expected
    # "Optional[Index]")
    return result  # type: ignore[return-value]
Ejemplo n.º 2
0
def _to_datetime_with_format(
    arg,
    orig_arg,
    name,
    tz,
    fmt: str,
    exact: bool,
    errors: str,
    infer_datetime_format: bool,
) -> Index | None:
    """
    Try parsing with the given format, returning None on failure.
    """
    result = None
    try:
        # shortcut formatting here
        if fmt == "%Y%m%d":
            # pass orig_arg as float-dtype may have been converted to
            # datetime64[ns]
            orig_arg = ensure_object(orig_arg)
            try:
                # may return None without raising
                result = _attempt_YYYYMMDD(orig_arg, errors=errors)
            except (ValueError, TypeError, OutOfBoundsDatetime) as err:
                raise ValueError(
                    "cannot convert the input to '%Y%m%d' date format"
                ) from err
            if result is not None:
                utc = tz == "utc"
                return _box_as_indexlike(result, utc=utc, name=name)

        # fallback
        res = _array_strptime_with_fallback(
            arg, name, tz, fmt, exact, errors, infer_datetime_format
        )
        return res

    except ValueError as err:
        # Fallback to try to convert datetime objects if timezone-aware
        #  datetime objects are found without passing `utc=True`
        try:
            values, tz = conversion.datetime_to_datetime64(arg)
            dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
            return DatetimeIndex._simple_new(dta, name=name)
        except (ValueError, TypeError):
            raise err
Ejemplo n.º 3
0
def _convert_listlike_datetimes(
    arg,
    format: str | None,
    name: Hashable = None,
    tz: Timezone | None = None,
    unit: str | None = None,
    errors: str = "raise",
    infer_datetime_format: bool = False,
    dayfirst: bool | None = None,
    yearfirst: bool | None = None,
    exact: bool = True,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : str
        None or string of the frequency of the passed data
    errors : str
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : bool, default False
        inferring format behavior from to_datetime
    dayfirst : bool
        dayfirst parsing behavior from to_datetime
    yearfirst : bool
        yearfirst parsing behavior from to_datetime
    exact : bool, default True
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        return _to_datetime_with_unit(arg, unit, name, tz, errors)
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            npvalues = np.array(["NaT"],
                                dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(npvalues, name=name)
        elif errors == "ignore":
            idx = Index(arg, name=name)
            return idx
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    if format is not None:
        res = _to_datetime_with_format(arg, orig_arg, name, tz, format, exact,
                                       errors, infer_datetime_format)
        if res is not None:
            return res

    assert format is None or infer_datetime_format
    utc = tz == "utc"
    result, tz_parsed = objects_to_datetime64ns(
        arg,
        dayfirst=dayfirst,
        yearfirst=yearfirst,
        utc=utc,
        errors=errors,
        require_iso8601=require_iso8601,
        allow_object=True,
    )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
Ejemplo n.º 4
0
 def to_timestamp(self, freq=None, how='start'):
     from pandas import DatetimeIndex
     result = self._data.to_timestamp(freq=freq, how=how)
     return DatetimeIndex._simple_new(result.asi8,
                                      name=self.name,
                                      freq=result.freq)
Ejemplo n.º 5
0
 def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
     arr = self._data.to_timestamp(freq, how)
     return DatetimeIndex._simple_new(arr, name=self.name)
Ejemplo n.º 6
0
def _convert_listlike_datetimes(
    arg,
    format,
    name=None,
    tz=None,
    unit=None,
    errors=None,
    infer_datetime_format=None,
    dayfirst=None,
    yearfirst=None,
    exact=None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has
            # no attribute "tz_convert"
            arg = arg.tz_convert(None).tz_localize(tz)  # type: ignore
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            # error: Item "DatetimeIndex" of "Union[DatetimeArray, DatetimeIndex]" has
            # no attribute "tz_localize"
            return arg.tz_localize(tz)  # type: ignore

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "_values", arg)

        # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
        # because it expects an ndarray argument
        if isinstance(arg, IntegerArray):
            result = arg.astype(f"datetime64[{unit}]")
            tz_parsed = None
        else:

            result, tz_parsed = tslib.array_with_unit_to_datetime(
                arg, unit, errors=errors)

        if errors == "ignore":

            result = Index(result, name=name)
        else:
            result = DatetimeIndex(result, name=name)
        # GH 23758: We may still need to localize the result with tz
        # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
        # result will be naive but in UTC
        try:
            result = result.tz_localize("UTC").tz_convert(tz_parsed)
        except AttributeError:
            # Regular Index from 'ignore' path
            return result
        if tz is not None:
            if result.tz is None:
                result = result.tz_localize(tz)
            else:
                result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(result, name=name)
        elif errors == "ignore":
            result = Index(arg, name=name)
            return result
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError,
                        tslibs.OutOfBoundsDatetime) as err:
                    raise ValueError(
                        "cannot convert the input to '%Y%m%d' date format"
                    ) from err

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
                return DatetimeIndex._simple_new(dta, name=name)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
Ejemplo n.º 7
0
    def _convert_listlike(arg, box, format, name=None, tz=tz):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz=tz, name=name)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz=tz, name=name)
                except ValueError:
                    pass

            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg, unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result, tz=tz, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = _ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = array_strptime(arg, format, exact=exact,
                                                errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    require_iso8601=require_iso8601
                )

            if is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz=tz, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
Ejemplo n.º 8
0
    def _convert_listlike(arg, box, format, name=None, tz=tz):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz=tz, name=name)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz=tz, name=name)
                except ValueError:
                    pass

            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg,
                                                       unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result, tz=tz, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = _ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = array_strptime(arg,
                                                format,
                                                exact=exact,
                                                errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    require_iso8601=require_iso8601)

            if is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz=tz, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e