def _factorize_int64(left_index, right_index, sort=True): rizer = lib.Int64Factorizer(max(len(left_index), len(right_index))) llab, _ = rizer.factorize(left_index) rlab, _ = rizer.factorize(right_index) if sort: llab, rlab = _sort_labels(np.array(rizer.uniques), llab, rlab) return llab, rlab, rizer.get_count()
def _factorize_int64(left_index, right_index, sort=True): rizer = lib.Int64Factorizer(max(len(left_index), len(right_index))) # 32-bit compatibility if left_index.dtype != np.int64: # pragma: no cover left_index = left_index.astype('i8') if right_index.dtype != np.int64: # pragma: no cover right_index = right_index.astype('i8') llab, _ = rizer.factorize(left_index) rlab, _ = rizer.factorize(right_index) if sort: llab, rlab = _sort_labels(np.array(rizer.uniques), llab, rlab) return llab, rlab, rizer.get_count()
def algo4(): rizer = lib.DictFactorizer() labs1, _ = rizer.factorize(labels, sort=False) k1 = len(rizer.uniques) rizer = lib.DictFactorizer() labs2, _ = rizer.factorize(labels2, sort=False) k2 = len(rizer.uniques) group_id = labs1 * k2 + labs2 max_group = k1 * k2 if max_group > 1e6: rizer = lib.Int64Factorizer(len(group_id)) group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True) max_group = len(rizer.uniques) out = np.empty(max_group) counts = np.zeros(max_group, dtype='i4') lib.group_add(out, counts, data, group_id)