Exemple #1
0
    def test_unique(self):
        import numpy as np
        from cykhash import unique_int64
        a = np.array([1,2,3,3,2,1], dtype=np.int64)
        u = np.ctypeslib.as_array(unique_int64(a)) # there will be no reallocation
        print(u) # [1,2,3] or any permutation of it

        self.assertTrue(True)
Exemple #2
0
    def test_quick_tutorial_2(self):
        import numpy as np
        a = np.array([1,2,3,3,2,1], dtype=np.int64)
        
        # actual usage:
        from cykhash import unique_int64
        unique_buffer = unique_int64(a) # unique element are exposed via buffer-protocol

        # can be converted to a numpy-array without copying via
        unique_array = np.ctypeslib.as_array(unique_buffer)

        self.assertTrue(True)
Exemple #3
0
import numpy as np
import pandas as pd
import sys

import resource
import psutil

from cykhash import unique_int64, unique_int32

fun_name = sys.argv[1]
N = int(sys.argv[2])

a = np.arange(N, dtype=np.int64)

process = psutil.Process()
old = process.memory_info().rss
old_max = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024

if fun_name == "pandas":
    b = pd.unique(a)
else:
    b = np.frombuffer(memoryview(unique_int64(a)))

new_max = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024

if new_max > old_max:
    overhead_in_bytes = new_max - old
    print(len(b), overhead_in_bytes / float(N * 8))
else:
    print(len(b), "too small")
def cykhash_unique64(bufs):
    unique_int64(bufs[0])