class TestLshLookupTable(TestCase): hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8) l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash) def test_one_signature(self): #given signature = [0x0] #when lookup = self.l.lookup_table(signature) #then self.assertEqual(1, len(lookup)) self.assertEqual(['s0b0', [-1]], lookup[0]) return def test_n_signature(self): #given signature = [0xff, 0] #when lookup = self.l.lookup_table(signature) #then self.assertEqual(2, len(lookup)) return def test_n_band_signature(self): #given nr_unique_signature = 9 signature = list(range(nr_unique_signature)) * BAND #when lookup = self.l.lookup_table(signature) #then self.assertEqual(nr_unique_signature * BAND, len(lookup)) def test_lookup_table_with_None(self): #given signature = None #when lookup = self.l.lookup_table(signature) #then self.assertEqual([], lookup) return def test_lookup_tabke_with_empty(self): #given signature = [] #when lookup = self.l.lookup_table(signature) #then self.assertEqual([], lookup)
def test_index_retrieval(feat_dim=10, sig_dim=100): '''Test lsh index and retrieval''' sim = lsh.JaccardSimilarity() lshash = lsh.Lsh(feat_dim, sig_dim, sim) for _ in xrange(100): lshash.index(lsh.LshObject(gen_random_bit_vector(feat_dim))) for _ in xrange(5): obj = lsh.LshObject(gen_random_bit_vector(feat_dim)) results = lshash.retrieve(obj) print "Data retrieved for %s:" % obj for result in results: print result
class TestLshHashing(TestCase): hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8) l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash) def test_hashing(self): #given data = [0] * WINDOW_SIZE * 2 #when value = self.l.hashing(data) #then self.assertEqual(BAND * 2, len(value)) self.assertEqual('0' * BAND * 2, "".join(str(x) for x in value))
def test_random_projection(feat_dim=5, sig_dim=1000): ''' test lsh for cosine similarity''' sim = lsh.CosineSimilarity() lshash = lsh.Lsh(feat_dim, sig_dim, sim) x = lsh.LshObject(gen_random_vector(feat_dim)) y = lsh.LshObject(gen_random_vector(feat_dim)) lshash.generate_signature(x) lshash.generate_signature(y) print x.feature print y.feature print "cosine similarity: " + \ str(sim.compute_similarity(x.feature, y.feature)) print "approximated similarity: " + \ str(sim.approximate_similarity(x.signature, y.signature))
def test_minhash(feat_dim=5, sig_dim=1000): ''' test lsh for jaccard similarity''' sim = lsh.JaccardSimilarity() lshash = lsh.Lsh(feat_dim, sig_dim, sim) x = lsh.LshObject(gen_random_bit_vector(feat_dim)) y = lsh.LshObject(gen_random_bit_vector(feat_dim)) lshash.generate_signature(x) lshash.generate_signature(y) print x.feature print y.feature print "jaccard similarity: " +\ str(sim.compute_similarity(x.feature, y.feature)) print "approximated similarity: " +\ str(sim.approximate_similarity(x.signature, y.signature))
def __init__ (self, ss, window_size, band, row, hash): self._ss = ss self._window_size = window_size self._band = band self._row = row self._lsh = lsh.Lsh(window_size, band, row, hash)