def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) if len(ax) == 0: binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name) return binner, [], labels first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, closed=self.closed, base=self.base) tz = ax.tz binner = labels = DatetimeIndex(freq=self.freq, start=first.replace(tzinfo=None), end=last.replace(tzinfo=None), tz=tz, name=ax.name) # a little hack trimmed = False if (len(binner) > 2 and binner[-2] == last and self.closed == 'right'): binner = binner[:-1] trimmed = True ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans) if self.closed == 'right': labels = binner if self.label == 'right': labels = labels[1:] elif not trimmed: labels = labels[:-1] else: if self.label == 'right': labels = labels[1:] elif not trimmed: labels = labels[:-1] if ax.hasnans: binner = binner.insert(0, tslib.NaT) labels = labels.insert(0, tslib.NaT) # if we end up with more labels than bins # adjust the labels # GH4076 if len(bins) < len(labels): labels = labels[:len(bins)] return binner, bins, labels
def _concat_datetime(to_concat, axis=0, typs=None): """ provide concatenation of an datetimelike array of arrays each of which is a single M8[ns], datetimet64[ns, tz] or m8[ns] dtype Parameters ---------- to_concat : array of arrays axis : axis to provide concatenation typs : set of to_concat dtypes Returns ------- a single array, preserving the combined dtypes """ def convert_to_pydatetime(x, axis): # coerce to an object dtype # if dtype is of datetimetz or timezone if x.dtype.kind == _NS_DTYPE.kind: if getattr(x, 'tz', None) is not None: x = x.asobject.values else: shape = x.shape x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) x = x.reshape(shape) elif x.dtype == _TD_DTYPE: shape = x.shape x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) x = x.reshape(shape) if axis == 1: x = np.atleast_2d(x) return x if typs is None: typs = get_dtype_kinds(to_concat) # must be single dtype if len(typs) == 1: if 'datetimetz' in typs: # datetime with no tz should be stored as "datetime" in typs, # thus no need to care # we require ALL of the same tz for datetimetz tzs = set([str(x.tz) for x in to_concat]) if len(tzs) == 1: from pandas.tseries.index import DatetimeIndex new_values = np.concatenate( [x.tz_localize(None).asi8 for x in to_concat]) return DatetimeIndex(new_values, tz=list(tzs)[0]) elif 'datetime' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) return new_values.view(_NS_DTYPE) elif 'timedelta' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) return new_values.view(_TD_DTYPE) # need to coerce to object to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] return np.concatenate(to_concat, axis=axis)
if arg is None: return arg elif isinstance(arg, datetime): return arg elif isinstance(arg, Series): values = _convert_f(arg.values) return Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (np.ndarray, list)): if isinstance(arg, list): arg = np.array(arg, dtype='O') if com.is_datetime64_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None) except ValueError, e: try: values, tz = lib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) except (ValueError, TypeError): raise e return arg try: return _convert_f(arg) except ValueError: raise return arg try:
def _convert_listlike(arg, box, format): if isinstance(arg, (list,tuple)): arg = np.array(arg, dtype='O') if com.is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None) except ValueError: pass return arg arg = com._ensure_object(arg) if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = ( '%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or '%Y-%m-%d %H:%M:%S.%f'.startswith(format) ) if format_is_iso8601: format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg) except: raise ValueError("cannot convert the input to '%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime( arg, format, coerce=coerce ) except (tslib.OutOfBoundsDatetime): if errors == 'raise': raise result = arg except ValueError: # Only raise this error if the user provided the # datetime format, and not when it was inferred if not infer_datetime_format: raise if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime(arg, raise_=errors == 'raise', utc=utc, dayfirst=dayfirst, coerce=coerce, unit=unit) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) except (ValueError, TypeError): raise e
def test_cdaterange_holidays(self): rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) self.assert_(xp.equals(rng))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) index = data.index name = data.name orig = data if is_categorical_dtype(data) else None if orig is not None: data = orig.values.categories if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data)))
def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True, format=None, coerce=False, unit='ns'): """ Convert argument to datetime Parameters ---------- arg : string, datetime, array of strings (with possible NAs) errors : {'ignore', 'raise'}, default 'ignore' Errors are ignored by default (values left untouched) dayfirst : boolean, default False If True parses dates with the day first, eg 20/01/2005 Warning: dayfirst=True is not strict, but will prefer to parse with day first (this is a known bug). utc : boolean, default None Return UTC DatetimeIndex if True (converting any tz-aware datetime.datetime objects as well) box : boolean, default True If True returns a DatetimeIndex, if False returns ndarray of values format : string, default None strftime to parse time, eg "%d/%m/%Y" coerce : force errors to NaT (False by default) unit : unit of the arg (D,s,ms,us,ns) denote the unit in epoch (e.g. a unix timestamp), which is an integer/float number Returns ------- ret : datetime if parsing succeeded """ from pandas import Timestamp from pandas.core.series import Series from pandas.tseries.index import DatetimeIndex def _convert_listlike(arg, box): if isinstance(arg, (list,tuple)): arg = np.array(arg, dtype='O') if com.is_datetime64_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None) except ValueError, e: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) return arg arg = com._ensure_object(arg) try: if format is not None: result = tslib.array_strptime(arg, format) else: result = tslib.array_to_datetime(arg, raise_=errors == 'raise', utc=utc, dayfirst=dayfirst, coerce=coerce, unit=unit) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None) return result except ValueError, e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) except (ValueError, TypeError): raise e
def test_cdaterange(self): rng = cdate_range('2013-05-01', periods=3) xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) self.assert_(xp.equals(rng))
def test_to_datetime_unit(self): epoch = 1370745748 s = Series([epoch + t for t in range(20)]) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) s = Series([epoch + t for t in range(20)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) # GH13834 s = Series([epoch + t for t in np.arange(0, 2, .25)] + [iNaT]).astype(float) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in np.arange(0, 2, .25)] + [NaT]) assert_series_equal(result, expected) s = concat([Series([epoch + t for t in range(20)] ).astype(float), Series([np.nan])], ignore_index=True) result = to_datetime(s, unit='s') expected = Series([Timestamp('2013-06-09 02:42:28') + timedelta( seconds=t) for t in range(20)] + [NaT]) assert_series_equal(result, expected) result = to_datetime([1, 2, 'NaT', pd.NaT, np.nan], unit='D') expected = DatetimeIndex([Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 3) tm.assert_index_equal(result, expected) with self.assertRaises(ValueError): to_datetime([1, 2, 'foo'], unit='D') with self.assertRaises(ValueError): to_datetime([1, 2, 111111111], unit='D') # coerce we can process expected = DatetimeIndex([Timestamp('1970-01-02'), Timestamp('1970-01-03')] + ['NaT'] * 1) result = to_datetime([1, 2, 'foo'], unit='D', errors='coerce') tm.assert_index_equal(result, expected) result = to_datetime([1, 2, 111111111], unit='D', errors='coerce') tm.assert_index_equal(result, expected)
def tsplot(series, plotf, *args, **kwargs): """ Plots a Series on the given Matplotlib axes object Parameters ---------- axes : Axes series : Series Notes _____ Supports same args and kwargs as Axes.plot """ # Used inferred freq is possible, need a test case for inferred freq = getattr(series.index, 'freq', None) if freq is None and hasattr(series.index, 'inferred_freq'): freq = series.index.inferred_freq if isinstance(freq, DateOffset): freq = freq.rule_code else: freq = frequencies.get_base_alias(freq) freq = frequencies.to_calendar_freq(freq) # Convert DatetimeIndex to PeriodIndex if isinstance(series.index, DatetimeIndex): idx = series.index.to_period(freq=freq) series = Series(series.values, idx, name=series.name) if not isinstance(series.index, PeriodIndex): #try to get it to DatetimeIndex then to period if series.index.inferred_type == 'datetime': idx = DatetimeIndex(series.index).to_period(freq=freq) series = Series(series.values, idx, name=series.name) else: raise TypeError('series argument to tsplot must have ' 'DatetimeIndex or PeriodIndex') if freq != series.index.freq: series = series.asfreq(freq) series = series.dropna() style = kwargs.pop('style', None) if 'ax' in kwargs: ax = kwargs.pop('ax') else: ax = plt.gca() # Specialized ts plotting attributes for Axes ax.freq = freq xaxis = ax.get_xaxis() xaxis.freq = freq xaxis.converter = DateConverter ax.legendlabels = [kwargs.get('label', None)] ax.view_interval = None ax.date_axis_info = None # format args and lot args = _check_plot_params(series, series.index, freq, style, *args) plotted = plotf(ax, *args, **kwargs) format_dateaxis(ax, ax.freq) # when adding a right axis (using add_yaxis), for some reason the # x axis limits don't get properly set. This gets around the problem xlim = ax.get_xlim() if xlim[0] == 0.0 and xlim[1] == 1.0: # if xlim still at default values, autoscale the axis ax.autoscale_view() left = series.index[0] #get_datevalue(series.index[0], freq) right = series.index[-1] #get_datevalue(series.index[-1], freq) ax.set_xlim(left, right) return plotted
def makeDateIndex(k=10, freq='B'): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k, freq=freq) return DatetimeIndex(dr)
def test_parsers(self): # https://github.com/dateutil/dateutil/issues/217 import dateutil yearfirst = dateutil.__version__ >= LooseVersion('2.5.0') cases = {'2011-01-01': datetime.datetime(2011, 1, 1), '2Q2005': datetime.datetime(2005, 4, 1), '2Q05': datetime.datetime(2005, 4, 1), '2005Q1': datetime.datetime(2005, 1, 1), '05Q1': datetime.datetime(2005, 1, 1), '2011Q3': datetime.datetime(2011, 7, 1), '11Q3': datetime.datetime(2011, 7, 1), '3Q2011': datetime.datetime(2011, 7, 1), '3Q11': datetime.datetime(2011, 7, 1), # quarterly without space '2000Q4': datetime.datetime(2000, 10, 1), '00Q4': datetime.datetime(2000, 10, 1), '4Q2000': datetime.datetime(2000, 10, 1), '4Q00': datetime.datetime(2000, 10, 1), '2000q4': datetime.datetime(2000, 10, 1), '2000-Q4': datetime.datetime(2000, 10, 1), '00-Q4': datetime.datetime(2000, 10, 1), '4Q-2000': datetime.datetime(2000, 10, 1), '4Q-00': datetime.datetime(2000, 10, 1), '00q4': datetime.datetime(2000, 10, 1), '2005': datetime.datetime(2005, 1, 1), '2005-11': datetime.datetime(2005, 11, 1), '2005 11': datetime.datetime(2005, 11, 1), '11-2005': datetime.datetime(2005, 11, 1), '11 2005': datetime.datetime(2005, 11, 1), '200511': datetime.datetime(2020, 5, 11), '20051109': datetime.datetime(2005, 11, 9), '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, 36, 28), "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), "Sep 25 2003": datetime.datetime(2003, 9, 25), "January 1 2014": datetime.datetime(2014, 1, 1), # GH 10537 '2014-06': datetime.datetime(2014, 6, 1), '06-2014': datetime.datetime(2014, 6, 1), '2014-6': datetime.datetime(2014, 6, 1), '6-2014': datetime.datetime(2014, 6, 1), '20010101 12': datetime.datetime(2001, 1, 1, 12), '20010101 1234': datetime.datetime(2001, 1, 1, 12, 34), '20010101 123456': datetime.datetime(2001, 1, 1, 12, 34, 56), } for date_str, expected in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str, yearfirst=yearfirst) result2 = to_datetime(date_str, yearfirst=yearfirst) result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below result4 = to_datetime(np.array([date_str], dtype=object), yearfirst=yearfirst) result6 = DatetimeIndex([date_str], yearfirst=yearfirst) # result7 is used below result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) result9 = DatetimeIndex(Series([date_str]), yearfirst=yearfirst) for res in [result1, result2]: self.assertEqual(res, expected) for res in [result3, result4, result6, result8, result9]: exp = DatetimeIndex([pd.Timestamp(expected)]) tm.assert_index_equal(res, exp) # these really need to have yearfist, but we don't support if not yearfirst: result5 = Timestamp(date_str) self.assertEqual(result5, expected) result7 = date_range(date_str, freq='S', periods=1, yearfirst=yearfirst) self.assertEqual(result7, expected) # NaT result1, _, _ = tools.parse_time_string('NaT') result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT)
def test_parsers_dayfirst_yearfirst(self): tm._skip_if_no_dateutil() # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 # 2.5.3 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 # 2.5.3 10-11-12 [dayfirst=0, yearfirst=1] -> 2010-11-12 00:00:00 # bug fix in 2.5.2 # 2.5.1 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-11-12 00:00:00 # 2.5.2 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 # 2.5.3 10-11-12 [dayfirst=1, yearfirst=1] -> 2010-12-11 00:00:00 # OK # 2.5.1 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 # 2.5.2 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 # 2.5.3 10-11-12 [dayfirst=1, yearfirst=0] -> 2012-11-10 00:00:00 # OK # 2.5.1 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.2 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.3 20/12/21 [dayfirst=0, yearfirst=0] -> 2021-12-20 00:00:00 # OK # 2.5.1 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 # 2.5.2 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 # 2.5.3 20/12/21 [dayfirst=0, yearfirst=1] -> 2020-12-21 00:00:00 # revert of bug in 2.5.2 # 2.5.1 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 # 2.5.2 20/12/21 [dayfirst=1, yearfirst=1] -> month must be in 1..12 # 2.5.3 20/12/21 [dayfirst=1, yearfirst=1] -> 2020-12-21 00:00:00 # OK # 2.5.1 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.2 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 # 2.5.3 20/12/21 [dayfirst=1, yearfirst=0] -> 2021-12-20 00:00:00 import dateutil is_lt_253 = dateutil.__version__ < LooseVersion('2.5.3') # str : dayfirst, yearfirst, expected cases = {'10-11-12': [(False, False, datetime.datetime(2012, 10, 11)), (True, False, datetime.datetime(2012, 11, 10)), (False, True, datetime.datetime(2010, 11, 12)), (True, True, datetime.datetime(2010, 12, 11))], '20/12/21': [(False, False, datetime.datetime(2021, 12, 20)), (True, False, datetime.datetime(2021, 12, 20)), (False, True, datetime.datetime(2020, 12, 21)), (True, True, datetime.datetime(2020, 12, 21))]} from dateutil.parser import parse for date_str, values in compat.iteritems(cases): for dayfirst, yearfirst, expected in values: # odd comparisons across version # let's just skip if dayfirst and yearfirst and is_lt_253: continue # compare with dateutil result dateutil_result = parse(date_str, dayfirst=dayfirst, yearfirst=yearfirst) self.assertEqual(dateutil_result, expected) result1, _, _ = tools.parse_time_string(date_str, dayfirst=dayfirst, yearfirst=yearfirst) # we don't support dayfirst/yearfirst here: if not dayfirst and not yearfirst: result2 = Timestamp(date_str) self.assertEqual(result2, expected) result3 = to_datetime(date_str, dayfirst=dayfirst, yearfirst=yearfirst) result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] self.assertEqual(result1, expected) self.assertEqual(result3, expected) self.assertEqual(result4, expected)
def makeDateIndex(k): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k) return DatetimeIndex(dr)
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_base = ['year', 'month', 'day', 'hour', 'minute', 'second', 'weekofyear', 'week', 'dayofweek', 'weekday', 'dayofyear', 'quarter', 'freq', 'days_in_month', 'daysinmonth', 'is_leap_year'] ok_for_period = ok_for_base + ['qyear', 'start_time', 'end_time'] ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = ok_for_base + ['date', 'time', 'microsecond', 'nanosecond', 'is_month_start', 'is_month_end', 'is_quarter_start', 'is_quarter_end', 'is_year_start', 'is_year_end', 'tz', 'weekday_name'] ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name'] ok_for_td = ['days', 'seconds', 'microseconds', 'nanoseconds'] ok_for_td_methods = ['components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil'] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx')] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'US/Eastern') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex(s.values).tz_localize('UTC') .tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # round s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.round('D') expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # round with tz result = (s.dt.tz_localize('UTC') .dt.tz_convert('US/Eastern') .dt.round('D')) exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']).tz_localize('US/Eastern') expected = Series(exp_values, name='xxx') tm.assert_series_equal(result, expected) # floor s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.floor('D') expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # ceil s = Series(pd.to_datetime(['2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00']), name='xxx') result = s.dt.ceil('D') expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-02']), name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'CET') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # timedeltaindex cases = [Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx')] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components self.assertIsInstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.total_seconds() self.assertIsInstance(result, pd.Series) self.assertTrue(result.dtype == 'float64') freq_result = s.dt.freq self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [Series(period_range('20130101', periods=5, freq='D'), name='xxx')] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq self.assertEqual(freq_result, PeriodIndex(s.values).freq) # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series(period_range('20130101', periods=5, freq='D', name='xxx').asobject) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): def f(): s.dt.hour[0] = 5 self.assertRaises(com.SettingWithCopyError, f)
def _get_time_bins(self, ax): if not isinstance(ax, DatetimeIndex): raise TypeError('axis must be a DatetimeIndex, but got ' 'an instance of %r' % type(ax).__name__) if len(ax) == 0: binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name) return binner, [], labels first, last = ax.min(), ax.max() first, last = _get_range_edges(first, last, self.freq, closed=self.closed, base=self.base) tz = ax.tz # GH #12037 # use first/last directly instead of call replace() on them # because replace() will swallow the nanosecond part # thus last bin maybe slightly before the end if the end contains # nanosecond part and lead to `Values falls after last bin` error binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last, tz=tz, name=ax.name) # a little hack trimmed = False if (len(binner) > 2 and binner[-2] == last and self.closed == 'right'): binner = binner[:-1] trimmed = True ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64(ax_values, bin_edges, self.closed, hasnans=ax.hasnans) if self.closed == 'right': labels = binner if self.label == 'right': labels = labels[1:] elif not trimmed: labels = labels[:-1] else: if self.label == 'right': labels = labels[1:] elif not trimmed: labels = labels[:-1] if ax.hasnans: binner = binner.insert(0, tslib.NaT) labels = labels.insert(0, tslib.NaT) # if we end up with more labels than bins # adjust the labels # GH4076 if len(bins) < len(labels): labels = labels[:len(bins)] return binner, bins, labels
def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz=tz, name=name) if utc: arg = arg.tz_convert(None).tz_localize('UTC') return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, require_iso8601=require_iso8601) if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz=tz, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def test_cdaterange_weekmask(self): rng = cdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu') xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) self.assert_(xp.equals(rng))
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) self.assert_index_equal(dr, exp) # indexing result = s.iloc[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # formatting with NaT result = s.shift() self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) self.assertTrue('NaT' in str(result)) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific') ]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern') ]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
def _convert_listlike(arg, box, format, name=None): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if com.is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, name=name) except ValueError: pass return arg elif com.is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: arg = arg.tz_convert(None) return arg elif format is None and com.is_integer_dtype(arg) and unit == 'ns': result = arg.astype('datetime64[ns]') if box: return DatetimeIndex(result, tz='utc' if utc else None, name=name) return result arg = com._ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = (('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and format != '%Y') if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError( "cannot convert the input to '%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime(arg, format, exact=exact, errors=errors) except (tslib.OutOfBoundsDatetime): if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq, unit=unit, require_iso8601=require_iso8601) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def test_parsers(self): cases = { '2011-01-01': datetime.datetime(2011, 1, 1), '2Q2005': datetime.datetime(2005, 4, 1), '2Q05': datetime.datetime(2005, 4, 1), '2005Q1': datetime.datetime(2005, 1, 1), '05Q1': datetime.datetime(2005, 1, 1), '2011Q3': datetime.datetime(2011, 7, 1), '11Q3': datetime.datetime(2011, 7, 1), '3Q2011': datetime.datetime(2011, 7, 1), '3Q11': datetime.datetime(2011, 7, 1), # quarterly without space '2000Q4': datetime.datetime(2000, 10, 1), '00Q4': datetime.datetime(2000, 10, 1), '4Q2000': datetime.datetime(2000, 10, 1), '4Q00': datetime.datetime(2000, 10, 1), '2000q4': datetime.datetime(2000, 10, 1), '2000-Q4': datetime.datetime(2000, 10, 1), '00-Q4': datetime.datetime(2000, 10, 1), '4Q-2000': datetime.datetime(2000, 10, 1), '4Q-00': datetime.datetime(2000, 10, 1), '2000q4': datetime.datetime(2000, 10, 1), '00q4': datetime.datetime(2000, 10, 1), '2005': datetime.datetime(2005, 1, 1), '2005-11': datetime.datetime(2005, 11, 1), '2005 11': datetime.datetime(2005, 11, 1), '11-2005': datetime.datetime(2005, 11, 1), '11 2005': datetime.datetime(2005, 11, 1), '200511': datetime.datetime(2020, 5, 11), '20051109': datetime.datetime(2005, 11, 9), '20051109 10:15': datetime.datetime(2005, 11, 9, 10, 15), '20051109 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005-11-09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005-11-09 08H': datetime.datetime(2005, 11, 9, 8, 0), '2005/11/09 10:15': datetime.datetime(2005, 11, 9, 10, 15), '2005/11/09 08H': datetime.datetime(2005, 11, 9, 8, 0), "Thu Sep 25 10:36:28 2003": datetime.datetime(2003, 9, 25, 10, 36, 28), "Thu Sep 25 2003": datetime.datetime(2003, 9, 25), "Sep 25 2003": datetime.datetime(2003, 9, 25), "January 1 2014": datetime.datetime(2014, 1, 1), # GH 10537 '2014-06': datetime.datetime(2014, 6, 1), '06-2014': datetime.datetime(2014, 6, 1), '2014-6': datetime.datetime(2014, 6, 1), '6-2014': datetime.datetime(2014, 6, 1), } for date_str, expected in compat.iteritems(cases): result1, _, _ = tools.parse_time_string(date_str) result2 = to_datetime(date_str) result3 = to_datetime([date_str]) result4 = to_datetime(np.array([date_str], dtype=object)) result5 = Timestamp(date_str) result6 = DatetimeIndex([date_str])[0] result7 = date_range(date_str, freq='S', periods=1) self.assertEqual(result1, expected) self.assertEqual(result2, expected) self.assertEqual(result3, expected) self.assertEqual(result4, expected) self.assertEqual(result5, expected) self.assertEqual(result6, expected) self.assertEqual(result7, expected) # NaT result1, _, _ = tools.parse_time_string('NaT') result2 = to_datetime('NaT') result3 = Timestamp('NaT') result4 = DatetimeIndex(['NaT'])[0] self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT) self.assertTrue(result1 is tslib.NaT)