Example #1
0
def _factorize_int64(left_index, right_index, sort=True):
    rizer = lib.Int64Factorizer(max(len(left_index), len(right_index)))

    llab, _ = rizer.factorize(left_index)
    rlab, _ = rizer.factorize(right_index)

    if sort:
        llab, rlab = _sort_labels(np.array(rizer.uniques), llab, rlab)

    return llab, rlab, rizer.get_count()
Example #2
0
File: merge.py Project: lahi/pandas
def _factorize_int64(left_index, right_index, sort=True):
    rizer = lib.Int64Factorizer(max(len(left_index), len(right_index)))

    # 32-bit compatibility
    if left_index.dtype != np.int64:  # pragma: no cover
        left_index = left_index.astype('i8')

    if right_index.dtype != np.int64:  # pragma: no cover
        right_index = right_index.astype('i8')

    llab, _ = rizer.factorize(left_index)
    rlab, _ = rizer.factorize(right_index)

    if sort:
        llab, rlab = _sort_labels(np.array(rizer.uniques), llab, rlab)

    return llab, rlab, rizer.get_count()
Example #3
0
def algo4():
    rizer = lib.DictFactorizer()
    labs1, _ = rizer.factorize(labels, sort=False)
    k1 = len(rizer.uniques)

    rizer = lib.DictFactorizer()
    labs2, _ = rizer.factorize(labels2, sort=False)
    k2 = len(rizer.uniques)

    group_id = labs1 * k2 + labs2
    max_group = k1 * k2

    if max_group > 1e6:
        rizer = lib.Int64Factorizer(len(group_id))
        group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True)
        max_group = len(rizer.uniques)

    out = np.empty(max_group)
    counts = np.zeros(max_group, dtype='i4')
    lib.group_add(out, counts, data, group_id)