def test_weights(): ogdlr_weights = ogdlr_after.weights() ftrl_weights = ftrl_after.weights() hash_keys = [mmh(key, seed=SEED) % NDIMS for key in ftrl_after.keys()] hash_weights = hash_after._get_w(hash_keys) assert np.allclose(ogdlr_weights, ftrl_weights) assert np.allclose(hash_weights, ftrl_weights)
def hash_bin_to_bin(self, bin_id, attempt, seed): key = str(attempt) + "." + str(bin_id) return mmh(key=key, seed=seed, positive=True) % self.K
def hash_func(self, seed): return lambda x: mmh(key=x, seed=seed, positive=True) % self.D
if __name__ == '__main__': s1 = [480, 923, 106] s2 = [480, 106, 373] D = 1000 HD = 100 K = 10 print( "Jaccard", len([a for a in s1 if a in s2]) / (len(s1) + len(s2) - len([a for a in s1 if a in s2]))) vs = [] for i in range(0, 1000): #DMH = Densified_MinHash(K, HD, seed=i) DMH = Densified_MinHash(K, HD, seed=mmh(i, 30, positive=True), num_seed=mmh(i, 20, positive=True), hashFull=True) #x1 = DMH.get_hashed(s1) #x2 = DMH.get_hashed(s2) #xs1 = DMH.convert_to_bit_array(x1) #xs2 = DMH.convert_to_bit_array(x2) #xs1 = DMH.get_hashed_faster(s1) #xs2 = DMH.get_hashed_faster(s2) xs1 = DMH.get_hashed_4universal(s1) xs2 = DMH.get_hashed_4universal(s2) vs.append(np.dot(xs1, xs2) / K) print(np.mean(vs), np.std(vs))
def ft_mmh(text): global m hash_value = mmh(text, positive = True) return (hash_value % m)