def test_to_timestamp_to_period_astype(self): idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') res = idx.astype('period[M]') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') tm.assert_index_equal(res, exp) res = idx.astype('period[3M]') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') tm.assert_index_equal(res, exp)
def test_dti_astype_period(self): idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx") res = idx.astype("period[M]") exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx") tm.assert_index_equal(res, exp) res = idx.astype("period[3M]") exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx") tm.assert_index_equal(res, exp)
def test_astype(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(object) expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64) tm.assert_index_equal(result, expected) rng = date_range('1/1/2000', periods=10) result = rng.astype('i8') self.assert_numpy_array_equal(result, rng.asi8)
def date_to_timestamp(date: pd.DatetimeIndex): """Convert a pandas datetime to a javascript timestamp. This aims to match the timezone handling semantics used in Vega and Vega-Lite. Parameters ---------- timestamp : float The unix epoch timestamp. Returns ------- date : pd.DatetimeIndex The timestamps to be converted See Also -------- date_to_timestamp : opposite of this function """ if date.tzinfo is None: date = date.tz_localize(tzlocal()) try: # Works for pd.Timestamp return date.timestamp() * 1000 except AttributeError: # Works for pd.DatetimeIndex return date.astype('int64') * 1E-6
def get_fwd_rates(d1, d2, fwd_curve, cal, roll_conv): # Get historical fixings historical_dates = d1[d1 <= fwd_curve.curve_date] historical_fixings = [] historical_fixing_dates = [] for i,d in enumerate(historical_dates): if d in fwd_curve.historical_fixings.keys(): historical_fixings.append(fwd_curve.historical_fixings[d]/100.0) historical_fixing_dates.append(d) else: j = 1 while j < 10: d = DatetimeIndex([d]) d = pd.Timestamp(np.busday_offset(dates=d.astype(str), offsets=j, roll=roll_conv, busdaycal=cal)[0]) if d in fwd_curve.historical_fixings.keys(): historical_fixings.append(fwd_curve.historical_fixings[d]/100.0) historical_fixing_dates.append(d) break j += 1 # Calculate future fixings d2 = d2[d1 > fwd_curve.curve_date] d1 = d1[d1 > fwd_curve.curve_date] t = pd.Series(sch.year_fraction(d1,d2,fwd_curve.day_count_basis)) future_fixings = list((1 / t) * (fwd_curve.get_dcf(d1) / fwd_curve.get_dcf(d2) - 1)) return pd.Series(historical_fixing_dates + d1.to_list()), pd.Series(historical_fixings + future_fixings)
def test_astype_datetime64(self): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) result = idx.astype("datetime64[ns]") tm.assert_index_equal(result, idx) assert result is not idx result = idx.astype("datetime64[ns]", copy=False) tm.assert_index_equal(result, idx) assert result is idx idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST") result = idx_tz.astype("datetime64[ns]") expected = DatetimeIndex(["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]") tm.assert_index_equal(result, expected)
def test_astype_datetime64(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype('datetime64[ns]') tm.assert_index_equal(result, idx) assert result is not idx result = idx.astype('datetime64[ns]', copy=False) tm.assert_index_equal(result, idx) assert result is idx idx_tz = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN], tz='EST') result = idx_tz.astype('datetime64[ns]') expected = DatetimeIndex(['2016-05-16 05:00:00', 'NaT', 'NaT', 'NaT'], dtype='datetime64[ns]') tm.assert_index_equal(result, expected)
def test_astype(self): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(object) expected = Index([Timestamp('2016-05-16')] + [NaT] * 3, dtype=object) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64) tm.assert_index_equal(result, expected) rng = date_range('1/1/2000', periods=10) result = rng.astype('i8') tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_str_compat(self): # GH 13149, GH 13209 # verify that we are returning NaT as a string (and not unicode) idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(str) expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) tm.assert_index_equal(result, expected)
def test_astype_object(self): idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') expected_list = [ Timestamp('2013-01-31'), Timestamp('2013-02-28'), Timestamp('2013-03-31'), Timestamp('2013-04-30') ] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') expected_list = [ Timestamp('2013-01-31', tz='Asia/Tokyo'), Timestamp('2013-02-28', tz='Asia/Tokyo'), Timestamp('2013-03-31', tz='Asia/Tokyo'), Timestamp('2013-04-30', tz='Asia/Tokyo') ] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list idx = DatetimeIndex([ datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4) ], name='idx') expected_list = [ Timestamp('2013-01-01'), Timestamp('2013-01-02'), pd.NaT, Timestamp('2013-01-04') ] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list
def test_astype_object_with_nat(self): idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], name='idx') expected_list = [Timestamp('2013-01-01'), Timestamp('2013-01-02'), pd.NaT, Timestamp('2013-01-04')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) tm.assert_index_equal(result, expected) assert idx.tolist() == expected_list
def test_astype_datetime64(self): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") result = idx.astype("datetime64[ns]") tm.assert_index_equal(result, idx) assert result is not idx result = idx.astype("datetime64[ns]", copy=False) tm.assert_index_equal(result, idx) assert result is idx idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], tz="EST", name="idx") with tm.assert_produces_warning(FutureWarning): # dt64tz->dt64 deprecated result = idx_tz.astype("datetime64[ns]") expected = DatetimeIndex( ["2016-05-16 05:00:00", "NaT", "NaT", "NaT"], dtype="datetime64[ns]", name="idx", ) tm.assert_index_equal(result, expected)
def test_astype(self): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN], name="idx") result = idx.astype(object) expected = Index( [Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx" ) tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index( [1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx", ) tm.assert_index_equal(result, expected) rng = date_range("1/1/2000", periods=10, name="idx") result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, rng.asi8)
def test_astype_object_with_nat(self): idx = DatetimeIndex( [datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)], name="idx", ) expected_list = [ Timestamp("2013-01-01"), Timestamp("2013-01-02"), NaT, Timestamp("2013-01-04"), ] expected = Index(expected_list, dtype=object, name="idx") result = idx.astype(object) tm.assert_index_equal(result, expected) assert idx.tolist() == expected_list
def test_equals(self): # GH 13107 idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(Series(idx)) idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(Series(idx2)) # same internal, different tz idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2)
def test_astype_object(self): idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx') expected_list = [Timestamp('2013-01-31'), Timestamp('2013-02-28'), Timestamp('2013-03-31'), Timestamp('2013-04-30')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list idx = pd.date_range(start='2013-01-01', periods=4, freq='M', name='idx', tz='Asia/Tokyo') expected_list = [Timestamp('2013-01-31', tz='Asia/Tokyo'), Timestamp('2013-02-28', tz='Asia/Tokyo'), Timestamp('2013-03-31', tz='Asia/Tokyo'), Timestamp('2013-04-30', tz='Asia/Tokyo')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], name='idx') expected_list = [Timestamp('2013-01-01'), Timestamp('2013-01-02'), pd.NaT, Timestamp('2013-01-04')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) assert isinstance(result, Index) assert result.dtype == object tm.assert_index_equal(result, expected) assert result.name == expected.name assert idx.tolist() == expected_list
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) msg = 'Cannot cast DatetimeArrayMixin to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype)
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex vals = np.asarray(values) # localize to UTC is_datetimetz_type = is_datetimetz(values) if is_datetimetz_type: values = DatetimeIndex(values) vals = values.tz_localize(None) is_datetime = is_datetime64_dtype(vals) is_timedelta = is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if is_datetimetz_type: # reset tz uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( values.tz) elif is_datetime: uniques = uniques.astype('M8[ns]') elif is_timedelta: uniques = uniques.astype('m8[ns]') if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) msg = 'Cannot cast DatetimeIndex to dtype' with tm.assert_raises_regex(TypeError, msg): idx.astype(dtype)
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas import Index, PeriodIndex, DatetimeIndex name = getattr(values, 'name', None) values = Series(values).values if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes if com.is_categorical_dtype(values.dtype): result = values.value_counts(dropna) else: dtype = values.dtype is_period = com.is_period_arraylike(values) is_datetimetz = com.is_datetimetz(values) if com.is_datetime_or_timedelta_dtype( dtype) or is_period or is_datetimetz: if is_period: values = PeriodIndex(values) elif is_datetimetz: tz = getattr(values, 'tz', None) values = DatetimeIndex(values).tz_localize(None) values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: from pandas.tslib import iNaT msk = keys != iNaT keys, counts = keys[msk], counts[msk] # localize to the original tz if necessary if is_datetimetz: keys = DatetimeIndex(keys).tz_localize(tz) # convert the keys back to the dtype we came in else: keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) result.index = bins[:-1] if sort: result = result.sort_values(ascending=ascending) if normalize: result = result / float(values.size) return result
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) msg = 'Cannot cast DatetimeArray to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype)
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex vals = np.asarray(values) # localize to UTC is_datetimetz = com.is_datetimetz(values) if is_datetimetz: values = DatetimeIndex(values) vals = values.tz_localize(None) is_datetime = com.is_datetime64_dtype(vals) is_timedelta = com.is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) labels = com._ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: try: sorter = uniques.argsort() except: # unorderable in py3 if mixed str/int t = hash_klass(len(uniques)) t.map_locations(com._ensure_object(uniques)) # order ints before strings ordered = np.concatenate([ np.sort(np.array([e for i, e in enumerate(uniques) if f(e)], dtype=object)) for f in [lambda x: not isinstance(x, string_types), lambda x: isinstance(x, string_types)]]) sorter = com._ensure_platform_int(t.lookup( com._ensure_object(ordered))) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) uniques = uniques.take(sorter) if is_datetimetz: # reset tz uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( values.tz) elif is_datetime: uniques = uniques.astype('M8[ns]') elif is_timedelta: uniques = uniques.astype('m8[ns]') if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): """ Compute a histogram of the counts of non-null values. Parameters ---------- values : ndarray (1-d) sort : boolean, default True Sort by values ascending : boolean, default False Sort in ascending order normalize: boolean, default False If True then compute a relative histogram bins : integer, optional Rather than count values, group them into half-open bins, convenience for pd.cut, only works with numeric data dropna : boolean, default True Don't include counts of NaN Returns ------- value_counts : Series """ from pandas.core.series import Series from pandas.tools.tile import cut from pandas import Index, PeriodIndex, DatetimeIndex name = getattr(values, 'name', None) values = Series(values).values if bins is not None: try: cat, bins = cut(values, bins, retbins=True) except TypeError: raise TypeError("bins argument only works with numeric data.") values = cat.codes if com.is_categorical_dtype(values.dtype): result = values.value_counts(dropna) else: dtype = values.dtype is_period = com.is_period_arraylike(values) is_datetimetz = com.is_datetimetz(values) if com.is_datetime_or_timedelta_dtype(dtype) or is_period or \ is_datetimetz: if is_period: values = PeriodIndex(values) elif is_datetimetz: tz = getattr(values, 'tz', None) values = DatetimeIndex(values).tz_localize(None) values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # localize to the original tz if necessary if is_datetimetz: keys = DatetimeIndex(keys).tz_localize(tz) # convert the keys back to the dtype we came in else: keys = keys.astype(dtype) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) if not isinstance(keys, Index): keys = Index(keys) result = Series(counts, index=keys, name=name) if bins is not None: # TODO: This next line should be more efficient result = result.reindex(np.arange(len(cat.categories)), fill_value=0) result.index = bins[:-1] if sort: result = result.sort_values(ascending=ascending) if normalize: result = result / float(values.size) return result
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) msg = "Cannot cast DatetimeArray to dtype" with pytest.raises(TypeError, match=msg): idx.astype(dtype)