Esempio n. 1
0
class TestLshLookupTable(TestCase):
    hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8)
    l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash)

    def test_one_signature(self):
        #given
        signature = [0x0]

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(1, len(lookup))
        self.assertEqual(['s0b0', [-1]], lookup[0])
        return

    def test_n_signature(self):
        #given
        signature = [0xff, 0]

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(2, len(lookup))
        return

    def test_n_band_signature(self):
        #given
        nr_unique_signature = 9
        signature = list(range(nr_unique_signature)) * BAND

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual(nr_unique_signature * BAND, len(lookup))

    def test_lookup_table_with_None(self):
        #given
        signature = None

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual([], lookup)
        return

    def test_lookup_tabke_with_empty(self):
        #given
        signature = []

        #when
        lookup = self.l.lookup_table(signature)

        #then
        self.assertEqual([], lookup)
Esempio n. 2
0
def test_index_retrieval(feat_dim=10, sig_dim=100):
    '''Test lsh index and retrieval'''
    sim = lsh.JaccardSimilarity()
    lshash = lsh.Lsh(feat_dim, sig_dim, sim)
    for _ in xrange(100):
        lshash.index(lsh.LshObject(gen_random_bit_vector(feat_dim)))

    for _ in xrange(5):
        obj = lsh.LshObject(gen_random_bit_vector(feat_dim))
        results = lshash.retrieve(obj)
        print "Data retrieved for %s:" % obj
        for result in results:
            print result
Esempio n. 3
0
class TestLshHashing(TestCase):
    hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8)
    l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash)

    def test_hashing(self):
        #given
        data = [0] * WINDOW_SIZE * 2

        #when
        value = self.l.hashing(data)

        #then
        self.assertEqual(BAND * 2, len(value))
        self.assertEqual('0' * BAND * 2, "".join(str(x) for x in value))
Esempio n. 4
0
def test_random_projection(feat_dim=5, sig_dim=1000):
    ''' test lsh for cosine similarity'''

    sim = lsh.CosineSimilarity()
    lshash = lsh.Lsh(feat_dim, sig_dim, sim)
    x = lsh.LshObject(gen_random_vector(feat_dim))
    y = lsh.LshObject(gen_random_vector(feat_dim))
    lshash.generate_signature(x)
    lshash.generate_signature(y)

    print x.feature
    print y.feature
    print "cosine similarity: " + \
        str(sim.compute_similarity(x.feature, y.feature))
    print "approximated similarity: " + \
        str(sim.approximate_similarity(x.signature, y.signature))
Esempio n. 5
0
def test_minhash(feat_dim=5, sig_dim=1000):
    ''' test lsh for jaccard similarity'''

    sim = lsh.JaccardSimilarity()
    lshash = lsh.Lsh(feat_dim, sig_dim, sim)
    x = lsh.LshObject(gen_random_bit_vector(feat_dim))
    y = lsh.LshObject(gen_random_bit_vector(feat_dim))
    lshash.generate_signature(x)
    lshash.generate_signature(y)

    print x.feature
    print y.feature
    print "jaccard similarity: " +\
     str(sim.compute_similarity(x.feature, y.feature))
    print "approximated similarity: " +\
     str(sim.approximate_similarity(x.signature, y.signature))
Esempio n. 6
0
 def __init__ (self, ss, window_size, band, row, hash):
     self._ss = ss
     self._window_size = window_size
     self._band = band
     self._row = row
     self._lsh = lsh.Lsh(window_size, band, row, hash)