def test_view_asi8(self): idx = PeriodIndex([], freq="M") exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) idx = PeriodIndex(["2011-01", NaT], freq="M") exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = PeriodIndex(["2011-01-01", NaT], freq="D") tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp)
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex, PeriodIndex # handling two possibilities here # - for a numpy datetimelike simply view as i8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None if needs_i8_conversion(values): if is_period_dtype(values): values = PeriodIndex(values) vals = values.asi8 elif is_datetimetz(values): values = DatetimeIndex(values) vals = values.asi8 else: # numpy dtype dtype = values.dtype vals = values.view(np.int64) else: vals = np.asarray(values) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if dtype is not None: uniques = uniques.astype(dtype) if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex, PeriodIndex # handling two possibilities here # - for a numpy datetimelike simply view as i8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None if needs_i8_conversion(values): if is_period_dtype(values): values = PeriodIndex(values) vals = values.asi8 elif is_datetimetz(values): values = DatetimeIndex(values) vals = values.asi8 else: # numpy dtype dtype = values.dtype vals = values.view(np.int64) else: vals = np.asarray(values) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() check_nulls = not is_integer_dtype(values) labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if dtype is not None: uniques = uniques.astype(dtype) if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques