def test_unique(self): import numpy as np from cykhash import unique_int64 a = np.array([1,2,3,3,2,1], dtype=np.int64) u = np.ctypeslib.as_array(unique_int64(a)) # there will be no reallocation print(u) # [1,2,3] or any permutation of it self.assertTrue(True)
def test_quick_tutorial_2(self): import numpy as np a = np.array([1,2,3,3,2,1], dtype=np.int64) # actual usage: from cykhash import unique_int64 unique_buffer = unique_int64(a) # unique element are exposed via buffer-protocol # can be converted to a numpy-array without copying via unique_array = np.ctypeslib.as_array(unique_buffer) self.assertTrue(True)
import numpy as np import pandas as pd import sys import resource import psutil from cykhash import unique_int64, unique_int32 fun_name = sys.argv[1] N = int(sys.argv[2]) a = np.arange(N, dtype=np.int64) process = psutil.Process() old = process.memory_info().rss old_max = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 if fun_name == "pandas": b = pd.unique(a) else: b = np.frombuffer(memoryview(unique_int64(a))) new_max = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 if new_max > old_max: overhead_in_bytes = new_max - old print(len(b), overhead_in_bytes / float(N * 8)) else: print(len(b), "too small")
def cykhash_unique64(bufs): unique_int64(bufs[0])