def _factorize_keys(lk, rk, sort=True): if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk): klass = lib.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) else: klass = lib.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab = rizer.factorize(lk) rlab = rizer.factorize(rk) count = rizer.get_count() if sort: uniques = rizer.uniques.to_array() llab, rlab = _sort_labels(uniques, llab, rlab) # NA group lmask = llab == -1; lany = lmask.any() rmask = rlab == -1; rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def _factorize_keys(lk, rk, sort=True): if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) else: klass = _hash.Factorizer lk = com._ensure_object(lk) rk = com._ensure_object(rk) rizer = klass(max(len(lk), len(rk))) llab = rizer.factorize(lk) rlab = rizer.factorize(rk) count = rizer.get_count() if sort: uniques = rizer.uniques.to_array() llab, rlab = _sort_labels(uniques, llab, rlab) # NA group lmask = llab == -1 lany = lmask.any() rmask = rlab == -1 rany = rmask.any() if lany or rany: if lany: np.putmask(llab, lmask, count) if rany: np.putmask(rlab, rmask, count) count += 1 return llab, rlab, count
def _na_ok_dtype(dtype): return not _is_int_or_datetime_dtype(dtype)