def __init__(self, left, right, name, na_op): # need to make sure that we are aligning the data if isinstance(left, pd.Series) and isinstance(right, pd.Series): left, right = left.align(right,copy=False) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.name = name self.na_op = na_op # left self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = self.is_datetime64_lhs or self.is_datetime64tz_lhs self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = self.is_datetime64_rhs or self.is_datetime64tz_rhs self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def __init__(self, left, right, name, na_op): super(_TimeOp, self).__init__(left, right, name, na_op) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) # left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = (self.is_datetime64_lhs or self.is_datetime64tz_lhs) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = (self.is_datetime64_rhs or self.is_datetime64tz_rhs) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def get_op(cls, left, right, name, na_op): """ Get op dispatcher, returns _Op or _TimeOp. If ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``_Op``, indicating that the operation is performed via normal numpy path. """ is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = (is_datetime64_dtype(left) or is_datetime64tz_dtype(left)) if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): # avoid repated alignment if not left.index.equals(right.index): left, right = left.align(right, copy=False) index, lidx, ridx = left.index.join(right.index, how='outer', return_indexers=True) # if DatetimeIndex have different tz, convert to UTC left.index = index right.index = index if not (is_datetime_lhs or is_timedelta_lhs): return _Op(left, right, name, na_op) else: return _TimeOp(left, right, name, na_op)
def na_value_for_dtype(dtype): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype Returns ------- dtype compat na value """ from pandas.core import common as com from pandas import NaT dtype = pandas_dtype(dtype) if (com.is_datetime64_dtype(dtype) or com.is_datetime64tz_dtype(dtype) or com.is_timedelta64_dtype(dtype)): return NaT elif com.is_float_dtype(dtype): return np.nan elif com.is_integer_dtype(dtype): return 0 elif com.is_bool_dtype(dtype): return False return np.nan
def nonempty_sample_df(empty): """ Create a dataframe from the given empty dataframe that contains one row of fake data (generated from the empty dataframe's dtypes). """ nonempty = {} idx = pd.RangeIndex(start=0, stop=1, step=1) for key, dtype in empty.dtypes.iteritems(): if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz='America/New_York') elif pd.core.common.is_categorical_dtype(dtype): accessor = empty[key].cat example = accessor.categories[0] cat = pd.Categorical([example], categories=accessor.categories, ordered=accessor.ordered) entry = pd.Series(cat, name=key) elif dtype.kind in _simple_fake_mapping: entry = _simple_fake_mapping[dtype.kind] elif is_extension_type(dtype): raise TypeError("Can't handle extension dtype: {}".format(dtype)) elif dtype.name == 'object': entry = 'foo' else: raise TypeError("Can't handle dtype: {}".format(dtype)) if not isinstance(entry, pd.Series): entry = pd.Series([entry], name=key, index=idx) nonempty[key] = entry df = pd.DataFrame(nonempty) return df
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data))) index = data.index if is_datetime64_dtype(data.dtype) or is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
def nonempty_sample_df(empty): """ Create a dataframe from the given empty dataframe that contains one row of fake data (generated from the empty dataframe's dtypes). """ nonempty = {} idx = pd.RangeIndex(start=0, stop=1, step=1) for key, dtype in empty.dtypes.iteritems(): if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz='America/New_York') elif pd.core.common.is_categorical_dtype(dtype): accessor = empty[key].cat example = accessor.categories[0] cat = pd.Categorical([example], categories=accessor.categories, ordered=accessor.ordered) entry = pd.Series(cat, name=key) elif dtype.kind in _simple_fake_mapping: entry = _simple_fake_mapping[dtype.kind] elif is_extension_type(dtype): raise TypeError("Can't handle extension dtype: {}".format(dtype)) elif dtype.name == 'object': entry = 'foo' else: raise TypeError("Can't handle dtype: {}".format(dtype)) if not isinstance(entry, pd.Series): entry = pd.Series([entry], name=key, index=idx) nonempty[key] = entry df = pd.DataFrame(nonempty) return df
def test_basic(self): self.assertTrue(is_datetime64tz_dtype(self.dtype)) dr = date_range('20130101',periods=3,tz='US/Eastern') s = Series(dr,name='A') # dtypes self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue(is_datetime64tz_dtype(s)) self.assertFalse(is_datetime64tz_dtype(np.dtype('float64'))) self.assertFalse(is_datetime64tz_dtype(1.0)) self.assertTrue(is_datetimetz(s)) self.assertTrue(is_datetimetz(s.dtype)) self.assertFalse(is_datetimetz(np.dtype('float64'))) self.assertFalse(is_datetimetz(1.0))
def test_basic(self): self.assertTrue(is_datetime64tz_dtype(self.dtype)) dr = date_range("20130101", periods=3, tz="US/Eastern") s = Series(dr, name="A") # dtypes self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue(is_datetime64tz_dtype(s)) self.assertFalse(is_datetime64tz_dtype(np.dtype("float64"))) self.assertFalse(is_datetime64tz_dtype(1.0)) self.assertTrue(is_datetimetz(s)) self.assertTrue(is_datetimetz(s.dtype)) self.assertFalse(is_datetimetz(np.dtype("float64"))) self.assertFalse(is_datetimetz(1.0))
def maybe_to_datetimelike(data, copy=False): """ return a DelegatedClass of a Series that is datetimelike (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods) raise TypeError if this is not possible. Parameters ---------- data : Series copy : boolean, default False copy the input data Returns ------- DelegatedClass """ from pandas import Series if not isinstance(data, Series): raise TypeError( "cannot convert an object of type {0} to a datetimelike index". format(type(data))) index = data.index if is_datetime64_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) elif is_datetime64tz_dtype(data.dtype): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, name=data.name) elif is_timedelta64_dtype(data.dtype): return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name) else: if is_period_arraylike(data): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name) if is_datetime_arraylike(data): return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name) raise TypeError( "cannot convert an object of type {0} to a datetimelike index".format( type(data)))
def _factorize_keys(lk, rk, sort=True): if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer lk = com._ensure_int64(com._values_from_object(lk)) rk = com._ensure_int64(com._values_from_object(rk)) else: klass = _hash.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab = rizer.factorize(lk) rlab = rizer.factorize(rk) count = rizer.get_count() if sort: uniques = rizer.uniques.to_array() llab, rlab = _sort_labels(uniques, llab, rlab) # NA group lmask = llab == -1 lany = lmask.any() rmask = rlab == -1 rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def _nonempty_series(s, idx): dtype = s.dtype if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz=dtype.tz) data = [entry, entry] elif is_categorical_dtype(dtype): entry = s.cat.categories[0] data = pd.Categorical([entry, entry], categories=s.cat.categories, ordered=s.cat.ordered) else: entry = _scalar_from_dtype(dtype) data = [entry, entry] return pd.Series(data, name=s.name, index=idx)
def _nonempty_series(s, idx): dtype = s.dtype if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz=dtype.tz) data = [entry, entry] elif is_categorical_dtype(dtype): entry = s.cat.categories[0] data = pd.Categorical([entry, entry], categories=s.cat.categories, ordered=s.cat.ordered) else: entry = _scalar_from_dtype(dtype) data = np.array([entry, entry], dtype=dtype) return pd.Series(data, name=s.name, index=idx)
def __init__(self, left, right, name, na_op): # need to make sure that we are aligning the data if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): left, right = left.align(right, copy=False) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) self.name = name self.na_op = na_op # left self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) self.is_datetime64tz_lhs = is_datetime64tz_dtype(lvalues) self.is_datetime_lhs = (self.is_datetime64_lhs or self.is_datetime64tz_lhs) self.is_integer_lhs = left.dtype.kind in ['i', 'u'] self.is_floating_lhs = left.dtype.kind == 'f' # right self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) self.is_datetime_rhs = (self.is_datetime64_rhs or self.is_datetime64tz_rhs) self.is_timedelta_rhs = is_timedelta64_dtype(rvalues) self.is_integer_rhs = rvalues.dtype.kind in ('i', 'u') self.is_floating_rhs = rvalues.dtype.kind == 'f' self._validate(lvalues, rvalues, name) self.lvalues, self.rvalues = self._convert_for_datetime(lvalues, rvalues)
def _nonempty_series(s, idx): dtype = s.dtype if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz=dtype.tz) elif is_categorical_dtype(dtype): entry = pd.Categorical([s.cat.categories[0]], categories=s.cat.categories, ordered=s.cat.ordered) elif dtype.kind in ['i', 'f', 'u']: entry = dtype.type(1) elif dtype.kind in _simple_fake_mapping: entry = _simple_fake_mapping[dtype.kind] else: raise TypeError("Can't handle dtype: {0}".format(dtype)) return pd.Series([entry, entry], name=s.name, index=idx)
def as_json_table_type(x): """ Convert a NumPy / pandas type to its corresponding json_table. Parameters ---------- x : array or dtype Returns ------- t : str the Table Schema data types Notes ----- This table shows the relationship between NumPy / pandas dtypes, and Table Schema dtypes. ============== ================= Pandas type Table Schema type ============== ================= int64 integer float64 number bool boolean datetime64[ns] datetime timedelta64[ns] duration object str categorical any =============== ================= """ if is_integer_dtype(x): return 'integer' elif is_bool_dtype(x): return 'boolean' elif is_numeric_dtype(x): return 'number' elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)): return 'datetime' elif is_timedelta64_dtype(x): return 'duration' elif is_categorical_dtype(x): return 'any' elif is_string_dtype(x): return 'string' else: return 'any'
def maybe_convert_for_time_op(cls, left, right, name, na_op): """ if ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``NotImplementedError`` or ``None``, indicating that the operation is unsupported for datetimes (e.g., an unsupported r_op) or that the data is not the right type for time ops. """ # decide if we can do it is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = is_datetime64_dtype(left) or is_datetime64tz_dtype(left) if not (is_datetime_lhs or is_timedelta_lhs): return None return cls(left, right, name, na_op)
def as_json_table_type(x): """ Convert a NumPy / pandas type to its corresponding json_table. Parameters ---------- x : array or dtype Returns ------- t : str the Table Schema data types Notes ----- This table shows the relationship between NumPy / pandas dtypes, and Table Schema dtypes. ============== ================= Pandas type Table Schema type ============== ================= int64 integer float64 number bool boolean datetime64[ns] datetime timedelta64[ns] duration object str categorical any =============== ================= """ if is_integer_dtype(x): return 'integer' elif is_bool_dtype(x): return 'boolean' elif is_numeric_dtype(x): return 'number' elif (is_datetime64_dtype(x) or is_datetime64tz_dtype(x)): return 'datetime' elif is_timedelta64_dtype(x): return 'duration' elif is_categorical_dtype(x): return 'any' elif is_string_dtype(x): return 'string' else: return 'any'
def maybe_convert_for_time_op(cls, left, right, name, na_op): """ if ``left`` and ``right`` are appropriate for datetime arithmetic with operation ``name``, processes them and returns a ``_TimeOp`` object that stores all the required values. Otherwise, it will generate either a ``NotImplementedError`` or ``None``, indicating that the operation is unsupported for datetimes (e.g., an unsupported r_op) or that the data is not the right type for time ops. """ # decide if we can do it is_timedelta_lhs = is_timedelta64_dtype(left) is_datetime_lhs = (is_datetime64_dtype(left) or is_datetime64tz_dtype(left)) if not (is_datetime_lhs or is_timedelta_lhs): return None return cls(left, right, name, na_op)
def _nonempty_series(s, idx): dtype = s.dtype if is_datetime64tz_dtype(dtype): entry = pd.Timestamp('1970-01-01', tz=dtype.tz) data = [entry, entry] elif is_categorical_dtype(dtype): if len(s.cat.categories): data = [s.cat.categories[0]] * 2 cats = s.cat.categories else: data = _nonempty_index(s.cat.categories) cats = None data = pd.Categorical(data, categories=cats, ordered=s.cat.ordered) else: entry = _scalar_from_dtype(dtype) data = np.array([entry, entry], dtype=dtype) return pd.Series(data, name=s.name, index=idx)
def make_field(arr, dtype=None): dtype = dtype or arr.dtype if arr.name is None: name = 'values' else: name = arr.name field = {'name': name, 'type': as_json_table_type(dtype)} if is_categorical_dtype(arr): if hasattr(arr, 'categories'): cats = arr.categories ordered = arr.ordered else: cats = arr.cat.categories ordered = arr.cat.ordered field['constraints'] = {"enum": list(cats)} field['ordered'] = ordered elif is_datetime64tz_dtype(arr): if hasattr(arr, 'dt'): field['tz'] = arr.dt.tz.zone else: field['tz'] = arr.tz.zone return field
def make_field(arr, dtype=None): dtype = dtype or arr.dtype if arr.name is None: name = 'values' else: name = arr.name field = {'name': name, 'type': as_json_table_type(dtype)} if is_categorical_dtype(arr): if hasattr(arr, 'categories'): cats = arr.categories ordered = arr.ordered else: cats = arr.cat.categories ordered = arr.cat.ordered field['constraints'] = {"enum": list(cats)} field['ordered'] = ordered elif is_datetime64tz_dtype(arr): if hasattr(arr, 'dt'): field['tz'] = arr.dt.tz.zone else: field['tz'] = arr.tz.zone return field
def _convert_listlike(arg, box, format, name=None): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if com.is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, name=name) except ValueError: pass return arg elif com.is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: arg = arg.tz_convert(None).tz_localize('UTC') return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index return Index(result) return DatetimeIndex(result, tz='utc' if utc else None, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') arg = com._ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime(arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq, require_iso8601=require_iso8601) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') self.assertTrue(com.is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize( 'UTC').tz_convert(tz=s.dt.tz))) # indexing result = s.iloc[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # formatting with NaT result = s.shift() self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) self.assertTrue('NaT' in str(result)) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)
def _convert_listlike(arg, box, format, name=None): if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if com.is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, name=name) except ValueError: pass return arg elif com.is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: arg = arg.tz_convert(None) return arg elif format is None and com.is_integer_dtype(arg) and unit=='ns': result = arg.astype('datetime64[ns]') if box: return DatetimeIndex(result, tz='utc' if utc else None, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, 1-d array, or Series') arg = com._ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = ( ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and format != '%Y') if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime( arg, format, exact=exact, errors=errors) except tslib.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime( arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq, unit=unit, require_iso8601=require_iso8601 ) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def _convert_listlike(arg, box, format, name=None): if isinstance(arg, (list,tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if com.is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, name=name) except ValueError: pass return arg elif com.is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: arg = arg.tz_convert(None) return arg elif format is None and com.is_integer_dtype(arg) and unit=='ns': result = arg.astype('datetime64[ns]') if box: return DatetimeIndex(result, tz='utc' if utc else None, name=name) return result arg = com._ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = ( ('%Y-%m-%dT%H:%M:%S.%f'.startswith(format) or '%Y-%m-%d %H:%M:%S.%f'.startswith(format)) and format != '%Y' ) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None try: result = None if format is not None: # shortcut formatting here if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) except: raise ValueError("cannot convert the input to '%Y%m%d' date format") # fallback if result is None: try: result = tslib.array_strptime( arg, format, exact=exact, errors=errors) except (tslib.OutOfBoundsDatetime): if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg if result is None and (format is None or infer_datetime_format): result = tslib.array_to_datetime(arg, errors=errors, utc=utc, dayfirst=dayfirst, yearfirst=yearfirst, freq=freq, unit=unit, require_iso8601=require_iso8601) if com.is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) return result except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e
def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') self.assertTrue(com.is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') self.assertTrue( dr.equals( pd.DatetimeIndex(result).tz_localize('UTC').tz_convert( tz=s.dt.tz))) # indexing result = s.iloc[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[0] self.assertEqual( result, Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # astype result = s.astype(object) expected = Series(DatetimeIndex(s._values).asobject) assert_series_equal(result, expected) result = Series(s.values).dt.tz_localize('UTC').dt.tz_convert(s.dt.tz) assert_series_equal(result, s) # astype - datetime64[ns, tz] result = Series(s.values).astype('datetime64[ns, US/Eastern]') assert_series_equal(result, s) result = Series(s.values).astype(s.dtype) assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # formatting with NaT result = s.shift() self.assertTrue('datetime64[ns, US/Eastern]' in str(result)) self.assertTrue('NaT' in str(result)) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific') ]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern') ]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected)