Exemple #1
0
def test_mmhash3_int_array():
    rng = np.random.RandomState(42)
    keys = rng.randint(-5342534, 345345, size=3 * 2 * 1).astype(np.int32)
    keys = keys.reshape((3, 2, 1))

    for seed in [0, 42]:
        expected = np.array([murmurhash3_32(int(k), seed) for k in keys.flat])
        expected = expected.reshape(keys.shape)
        assert_array_equal(murmurhash3_32(keys, seed), expected)

    for seed in [0, 42]:
        expected = np.array(
            [murmurhash3_32(k, seed, positive=True) for k in keys.flat])
        expected = expected.reshape(keys.shape)
        assert_array_equal(murmurhash3_32(keys, seed, positive=True), expected)
Exemple #2
0
def test_mmhash3_int():
    assert murmurhash3_32(3) == 847579505
    assert murmurhash3_32(3, seed=0) == 847579505
    assert murmurhash3_32(3, seed=42) == -1823081949

    assert murmurhash3_32(3, positive=False) == 847579505
    assert murmurhash3_32(3, seed=0, positive=False) == 847579505
    assert murmurhash3_32(3, seed=42, positive=False) == -1823081949

    assert murmurhash3_32(3, positive=True) == 847579505
    assert murmurhash3_32(3, seed=0, positive=True) == 847579505
    assert murmurhash3_32(3, seed=42, positive=True) == 2471885347
Exemple #3
0
def test_uniform_distribution():
    n_bins, n_samples = 10, 100000
    bins = np.zeros(n_bins, dtype=np.float64)

    for i in range(n_samples):
        bins[murmurhash3_32(i, positive=True) % n_bins] += 1

    means = bins / n_samples
    expected = np.full(n_bins, 1. / n_bins)

    assert_array_almost_equal(means / expected, np.ones(n_bins), 2)
Exemple #4
0
def test_no_collision_on_byte_range():
    previous_hashes = set()
    for i in range(100):
        h = murmurhash3_32(' ' * i, 0)
        assert h not in previous_hashes, \
            "Found collision on growing empty string"
Exemple #5
0
def test_mmhash3_unicode():
    assert murmurhash3_32('foo', 0) == -156908512
    assert murmurhash3_32('foo', 42) == -1322301282

    assert murmurhash3_32('foo', 0, positive=True) == 4138058784
    assert murmurhash3_32('foo', 42, positive=True) == 2972666014