def algo4(): rizer = lib.DictFactorizer() labs1, _ = rizer.factorize(labels, sort=False) k1 = len(rizer.uniques) rizer = lib.DictFactorizer() labs2, _ = rizer.factorize(labels2, sort=False) k2 = len(rizer.uniques) group_id = labs1 * k2 + labs2 max_group = k1 * k2 if max_group > 1e6: rizer = lib.Int64Factorizer(len(group_id)) group_id, _ = rizer.factorize(group_id.astype('i8'), sort=True) max_group = len(rizer.uniques) out = np.empty(max_group) counts = np.zeros(max_group, dtype='i4') lib.group_add(out, counts, data, group_id)
def dict_unique(values, expected_K, sort=False, memory=False): if memory: gc.collect() before_mem = proc.get_memory_info().rss rizer = lib.DictFactorizer() result = rizer.unique_int64(values) if memory: result = proc.get_memory_info().rss - before_mem return result if sort: result.sort() assert (len(result) == expected_K) return result
def f2(): rizer = lib.DictFactorizer() labs, counts = rizer.factorize(xarr, sort=False) k = len(rizer.uniques) out = np.empty(k) lib.group_add(out, counts, data, labs)
def algo3_sort(): rizer = lib.DictFactorizer() labs, counts = rizer.factorize(labels, sort=True) k = len(rizer.uniques) out = np.empty(k) lib.group_add(out, counts, data, labs)