Example #1
0
def test_weights():
    ogdlr_weights = ogdlr_after.weights()
    ftrl_weights = ftrl_after.weights()
    hash_keys = [mmh(key, seed=SEED) % NDIMS for key in ftrl_after.keys()]
    hash_weights = hash_after._get_w(hash_keys)

    assert np.allclose(ogdlr_weights, ftrl_weights)
    assert np.allclose(hash_weights, ftrl_weights)
Example #2
0
def test_weights():
    ogdlr_weights = ogdlr_after.weights()
    ftrl_weights = ftrl_after.weights()
    hash_keys = [mmh(key, seed=SEED) % NDIMS for key in ftrl_after.keys()]
    hash_weights = hash_after._get_w(hash_keys)

    assert np.allclose(ogdlr_weights, ftrl_weights)
    assert np.allclose(hash_weights, ftrl_weights)
Example #3
0
 def hash_bin_to_bin(self, bin_id, attempt, seed):
     key = str(attempt) + "." + str(bin_id)
     return mmh(key=key, seed=seed, positive=True) % self.K
Example #4
0
 def hash_func(self, seed):
     return lambda x: mmh(key=x, seed=seed, positive=True) % self.D
Example #5
0
if __name__ == '__main__':
    s1 = [480, 923, 106]
    s2 = [480, 106, 373]
    D = 1000
    HD = 100
    K = 10
    print(
        "Jaccard",
        len([a for a in s1 if a in s2]) /
        (len(s1) + len(s2) - len([a for a in s1 if a in s2])))

    vs = []
    for i in range(0, 1000):
        #DMH = Densified_MinHash(K, HD, seed=i)
        DMH = Densified_MinHash(K,
                                HD,
                                seed=mmh(i, 30, positive=True),
                                num_seed=mmh(i, 20, positive=True),
                                hashFull=True)
        #x1 = DMH.get_hashed(s1)
        #x2 = DMH.get_hashed(s2)
        #xs1 = DMH.convert_to_bit_array(x1)
        #xs2 = DMH.convert_to_bit_array(x2)
        #xs1 = DMH.get_hashed_faster(s1)
        #xs2 = DMH.get_hashed_faster(s2)
        xs1 = DMH.get_hashed_4universal(s1)
        xs2 = DMH.get_hashed_4universal(s2)
        vs.append(np.dot(xs1, xs2) / K)
    print(np.mean(vs), np.std(vs))
Example #6
0
def ft_mmh(text):
	global m
	hash_value = mmh(text, positive = True)
	return (hash_value % m)