예제 #1
0
    def test_shift2(self):
        ts = Series(np.random.randn(5),
                    index=date_range('1/1/2000', periods=5, freq='H'))

        result = ts.shift(1, freq='5T')
        exp_index = ts.index.shift(1, freq='5T')
        tm.assert_index_equal(result.index, exp_index)

        # GH #1063, multiple of same base
        result = ts.shift(1, freq='4H')
        exp_index = ts.index + offsets.Hour(4)
        tm.assert_index_equal(result.index, exp_index)

        idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04'])
        msg = "Cannot shift with no freq"
        with pytest.raises(NullFrequencyError, match=msg):
            idx.shift(1)
예제 #2
0
 def to_timestamp(self, freq=None, how='start'):
     from pandas import DatetimeIndex
     result = self._data.to_timestamp(freq=freq, how=how)
     return DatetimeIndex._simple_new(result.asi8,
                                      name=self.name,
                                      freq=result.freq)
예제 #3
0
파일: datetimes.py 프로젝트: Axik/pandas
    def _convert_listlike(arg, box, format, name=None, tz=tz):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz=tz, name=name)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz=tz, name=name)
                except ValueError:
                    pass

            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg, unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result, tz=tz, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = _ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = array_strptime(arg, format, exact=exact,
                                                errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    require_iso8601=require_iso8601
                )

            if is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz=tz, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
예제 #4
0
    def test_to_datetime_unit(self):

        epoch = 1370745748
        s = Series([epoch + t for t in range(20)])
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT])
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        # GH13834
        s = Series([epoch + t
                    for t in np.arange(0, 2, .25)] + [iNaT]).astype(float)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in np.arange(0, 2, .25)
        ] + [NaT])
        assert_series_equal(result, expected)

        s = concat([
            Series([epoch + t for t in range(20)]).astype(float),
            Series([np.nan])
        ],
                   ignore_index=True)
        result = to_datetime(s, unit='s')
        expected = Series([
            Timestamp('2013-06-09 02:42:28') + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        assert_series_equal(result, expected)

        result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D')
        expected = DatetimeIndex(
            [Timestamp('1970-01-02'),
             Timestamp('1970-01-03')] + ['NaT'] * 3)
        tm.assert_index_equal(result, expected)

        with pytest.raises(ValueError):
            to_datetime([1, 2, 'foo'], unit='D')
        with pytest.raises(ValueError):
            to_datetime([1, 2, 111111111], unit='D')

        # coerce we can process
        expected = DatetimeIndex(
            [Timestamp('1970-01-02'),
             Timestamp('1970-01-03')] + ['NaT'] * 1)
        result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce')
        tm.assert_index_equal(result, expected)

        result = to_datetime([1, 2, 111111111], unit='D', errors='coerce')
        tm.assert_index_equal(result, expected)
예제 #5
0
    def _convert_listlike(arg, box, format, name=None, tz=tz):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz=tz, name=name)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz=tz, name=name)
                except ValueError:
                    pass

            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg,
                                                       unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result, tz=tz, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = _ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = tslib.array_strptime(arg,
                                                      format,
                                                      exact=exact,
                                                      errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    require_iso8601=require_iso8601)

            if is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz=tz, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e
예제 #6
0
파일: period.py 프로젝트: silpa21/Pandas
 def to_timestamp(self, freq=None, how="start") -> DatetimeIndex:
     arr = self._data.to_timestamp(freq, how)
     return DatetimeIndex._simple_new(arr, name=self.name)
예제 #7
0
    def test_constructor_with_datetime_tz(self):

        # 8260
        # support datetime64 with tz

        dr = date_range('20130101', periods=3, tz='US/Eastern')
        s = Series(dr)
        self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]')
        self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]')
        self.assertTrue(is_datetime64tz_dtype(s.dtype))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # export
        result = s.values
        assert isinstance(result, np.ndarray)
        self.assertTrue(result.dtype == 'datetime64[ns]')

        exp = pd.DatetimeIndex(result)
        exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz)
        tm.assert_index_equal(dr, exp)

        # indexing
        result = s.iloc[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))
        result = s[0]
        self.assertEqual(
            result,
            Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D'))

        result = s[Series([True, True, False], index=s.index)]
        assert_series_equal(result, s[0:2])

        result = s.iloc[0:1]
        assert_series_equal(result, Series(dr[0:1]))

        # concat
        result = pd.concat([s.iloc[0:1], s.iloc[1:]])
        assert_series_equal(result, s)

        # astype
        result = s.astype(object)
        expected = Series(DatetimeIndex(s._values).asobject)
        assert_series_equal(result, expected)

        result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz)
        assert_series_equal(result, s)

        # astype - datetime64[ns, tz]
        result = Series(s.values).astype('datetime64[ns, US/Eastern]')
        assert_series_equal(result, s)

        result = Series(s.values).astype(s.dtype)
        assert_series_equal(result, s)

        result = s.astype('datetime64[ns, CET]')
        expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET'))
        assert_series_equal(result, expected)

        # short str
        self.assertTrue('datetime64[ns, US/Eastern]' in str(s))

        # formatting with NaT
        result = s.shift()
        self.assertTrue('datetime64[ns, US/Eastern]' in str(result))
        self.assertTrue('NaT' in str(result))

        # long str
        t = Series(date_range('20130101', periods=1000, tz='US/Eastern'))
        self.assertTrue('datetime64[ns, US/Eastern]' in str(t))

        result = pd.DatetimeIndex(s, freq='infer')
        tm.assert_index_equal(result, dr)

        # inference
        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')
        ])
        self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]')
        self.assertTrue(lib.infer_dtype(s) == 'datetime64')

        s = Series([
            pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
            pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')
        ])
        self.assertTrue(s.dtype == 'object')
        self.assertTrue(lib.infer_dtype(s) == 'datetime')

        # with all NaT
        s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]')
        expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
        assert_series_equal(s, expected)
예제 #8
0
def _convert_listlike_datetimes(
    arg,
    format: Optional[str],
    name: Hashable = None,
    tz: Optional[Timezone] = None,
    unit: Optional[str] = None,
    errors: Optional[str] = None,
    infer_datetime_format: bool = False,
    dayfirst: Optional[bool] = None,
    yearfirst: Optional[bool] = None,
    exact: bool = True,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : bool, default False
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : bool, default True
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        return _to_datetime_with_unit(arg, unit, name, tz, errors)
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(result, name=name)
        elif errors == "ignore":
            # error: Incompatible types in assignment (expression has type
            # "Index", variable has type "ExtensionArray")
            result = Index(arg, name=name)  # type: ignore[assignment]
            return result
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    # error: Incompatible types in assignment (expression has type "None", variable has
    # type "ExtensionArray")
    result = None  # type: ignore[assignment]

    if format is not None:
        # error: Incompatible types in assignment (expression has type
        # "Optional[Index]", variable has type "ndarray")
        result = _to_datetime_with_format(  # type: ignore[assignment]
            arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
        )
        if result is not None:
            return result

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

        if tz_parsed is not None:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
            return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
예제 #9
0
def _convert_listlike_datetimes(
    arg,
    format: Optional[str],
    name: Hashable = None,
    tz: Optional[Timezone] = None,
    unit: Optional[str] = None,
    errors: Optional[str] = None,
    infer_datetime_format: Optional[bool] = None,
    dayfirst: Optional[bool] = None,
    yearfirst: Optional[bool] = None,
    exact: Optional[bool] = None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parsed
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    Index-like of parsed dates
    """

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    arg_dtype = getattr(arg, "dtype", None)
    # these are shortcutable
    if is_datetime64tz_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg_dtype):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass
        elif tz:
            # DatetimeArray, DatetimeIndex
            return arg.tz_localize(tz)

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "_values", arg)

        # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
        # because it expects an ndarray argument
        if isinstance(arg, IntegerArray):
            result = arg.astype(f"datetime64[{unit}]")
            tz_parsed = None
        else:

            result, tz_parsed = tslib.array_with_unit_to_datetime(
                arg, unit, errors=errors)

        if errors == "ignore":

            result = Index(result, name=name)
        else:
            result = DatetimeIndex(result, name=name)
        # GH 23758: We may still need to localize the result with tz
        # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
        # result will be naive but in UTC
        try:
            result = result.tz_localize("UTC").tz_convert(tz_parsed)
        except AttributeError:
            # Regular Index from 'ignore' path
            return result
        if tz is not None:
            if result.tz is None:
                result = result.tz_localize(tz)
            else:
                result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    try:
        arg, _ = maybe_convert_dtype(arg, copy=False)
    except TypeError:
        if errors == "coerce":
            result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
            return DatetimeIndex(result, name=name)
        elif errors == "ignore":
            result = Index(arg, name=name)
            return result
        raise

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, OutOfBoundsDatetime) as err:
                    raise ValueError(
                        "cannot convert the input to '%Y%m%d' date format"
                    ) from err

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, tz, name)
                except OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
                return DatetimeIndex._simple_new(dta, name=name)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        # We can take a shortcut since the datetime64 numpy array
        # is in UTC
        dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
        return DatetimeIndex._simple_new(dta, name=name)

    utc = tz == "utc"
    return _box_as_indexlike(result, utc=utc, name=name)
예제 #10
0
    def test_to_datetime_unit(self):

        epoch = 1370745748
        s = Series([epoch + t for t in range(20)])
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in range(20)
        ])
        tm.assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)]).astype(float)
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in range(20)
        ])
        tm.assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT])
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        tm.assert_series_equal(result, expected)

        s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float)
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        tm.assert_series_equal(result, expected)

        # GH13834
        s = Series([epoch + t
                    for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float)
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in np.arange(0, 2, 0.25)
        ] + [NaT])
        tm.assert_series_equal(result, expected)

        s = concat(
            [
                Series([epoch + t for t in range(20)]).astype(float),
                Series([np.nan])
            ],
            ignore_index=True,
        )
        result = to_datetime(s, unit="s")
        expected = Series([
            Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
            for t in range(20)
        ] + [NaT])
        tm.assert_series_equal(result, expected)

        result = to_datetime([1, 2, "NaT", pd.NaT, np.nan], unit="D")
        expected = DatetimeIndex(
            [Timestamp("1970-01-02"),
             Timestamp("1970-01-03")] + ["NaT"] * 3)
        tm.assert_index_equal(result, expected)

        msg = "non convertible value foo with the unit 'D'"
        with pytest.raises(ValueError, match=msg):
            to_datetime([1, 2, "foo"], unit="D")
        msg = "cannot convert input 111111111 with the unit 'D'"
        with pytest.raises(OutOfBoundsDatetime, match=msg):
            to_datetime([1, 2, 111111111], unit="D")

        # coerce we can process
        expected = DatetimeIndex(
            [Timestamp("1970-01-02"),
             Timestamp("1970-01-03")] + ["NaT"] * 1)
        result = to_datetime([1, 2, "foo"], unit="D", errors="coerce")
        tm.assert_index_equal(result, expected)

        result = to_datetime([1, 2, 111111111], unit="D", errors="coerce")
        tm.assert_index_equal(result, expected)
예제 #11
0
 def to_timestamp(self, freq=None, how='start'):
     from pandas import DatetimeIndex
     result = self._data.to_timestamp(freq=freq, how=how)
     return DatetimeIndex._simple_new(result.asi8,
                                      name=self.name,
                                      freq=result.freq)
예제 #12
0
    def _get_time_bins(self, ax):
        if not isinstance(ax, DatetimeIndex):
            raise TypeError('axis must be a DatetimeIndex, but got '
                            'an instance of %r' % type(ax).__name__)

        if len(ax) == 0:
            binner = labels = DatetimeIndex(data=[],
                                            freq=self.freq,
                                            name=ax.name)
            return binner, [], labels

        first, last = ax.min(), ax.max()
        first, last = _get_range_edges(first,
                                       last,
                                       self.freq,
                                       closed=self.closed,
                                       base=self.base)
        tz = ax.tz
        # GH #12037
        # use first/last directly instead of call replace() on them
        # because replace() will swallow the nanosecond part
        # thus last bin maybe slightly before the end if the end contains
        # nanosecond part and lead to `Values falls after last bin` error
        binner = labels = DatetimeIndex(freq=self.freq,
                                        start=first,
                                        end=last,
                                        tz=tz,
                                        name=ax.name)

        # a little hack
        trimmed = False
        if (len(binner) > 2 and binner[-2] == last and self.closed == 'right'):
            binner = binner[:-1]
            trimmed = True

        ax_values = ax.asi8
        binner, bin_edges = self._adjust_bin_edges(binner, ax_values)

        # general version, knowing nothing about relative frequencies
        bins = lib.generate_bins_dt64(ax_values,
                                      bin_edges,
                                      self.closed,
                                      hasnans=ax.hasnans)

        if self.closed == 'right':
            labels = binner
            if self.label == 'right':
                labels = labels[1:]
            elif not trimmed:
                labels = labels[:-1]
        else:
            if self.label == 'right':
                labels = labels[1:]
            elif not trimmed:
                labels = labels[:-1]

        if ax.hasnans:
            binner = binner.insert(0, tslib.NaT)
            labels = labels.insert(0, tslib.NaT)

        # if we end up with more labels than bins
        # adjust the labels
        # GH4076
        if len(bins) < len(labels):
            labels = labels[:len(bins)]

        return binner, bins, labels