예제 #1
0
def algo4():
    rizer = lib.DictFactorizer()
    labs1, _ = rizer.factorize(labels, sort=False)
    k1 = len(rizer.uniques)

    rizer = lib.DictFactorizer()
    labs2, _ = rizer.factorize(labels2, sort=False)
    k2 = len(rizer.uniques)

    group_id = labs1 * k2 + labs2
    max_group = k1 * k2

    if max_group > 1e6:
        rizer = lib.Int64Factorizer(len(group_id))
        group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True)
        max_group = len(rizer.uniques)

    out = np.empty(max_group)
    counts = np.zeros(max_group, dtype='i4')
    lib.group_add(out, counts, data, group_id)
예제 #2
0
def dict_unique(values, expected_K, sort=False, memory=False):
    if memory:
        gc.collect()
        before_mem = proc.get_memory_info().rss

    rizer = lib.DictFactorizer()
    result = rizer.unique_int64(values)

    if memory:
        result = proc.get_memory_info().rss - before_mem
        return result

    if sort:
        result.sort()
    assert (len(result) == expected_K)
    return result
예제 #3
0
def f2():
    rizer = lib.DictFactorizer()
    labs, counts = rizer.factorize(xarr, sort=False)
    k = len(rizer.uniques)
    out = np.empty(k)
    lib.group_add(out, counts, data, labs)
예제 #4
0
def algo3_sort():
    rizer = lib.DictFactorizer()
    labs, counts = rizer.factorize(labels, sort=True)
    k = len(rizer.uniques)
    out = np.empty(k)
    lib.group_add(out, counts, data, labs)