class TestHdHash(TestCase): nr_input_bits = 32 * 5 nr_output_bits = 8 h = hdhash.HdHash(nr_input_bits, nr_output_bits) def test_hashing_with_zeros(self): #givne data = [0] * 5 #when value = self.h.hashing(data) #then self.assertEqual(0, value) def test_hashing_with_ones(self): #givne data = [0xffffffff] * 5 #when value = self.h.hashing(data) #then self.assertEqual(0xff, value) def test_hashing_with_big_input(self): #given data = [0x0] * 10 #when value = self.h.hashing(data) #then self.assertEqual(0, value)
class TestLshLookupTable(TestCase): hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8) l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash) def test_one_signature(self): #given signature = [0x0] #when lookup = self.l.lookup_table(signature) #then self.assertEqual(1, len(lookup)) self.assertEqual(['s0b0', [-1]], lookup[0]) return def test_n_signature(self): #given signature = [0xff, 0] #when lookup = self.l.lookup_table(signature) #then self.assertEqual(2, len(lookup)) return def test_n_band_signature(self): #given nr_unique_signature = 9 signature = list(range(nr_unique_signature)) * BAND #when lookup = self.l.lookup_table(signature) #then self.assertEqual(nr_unique_signature * BAND, len(lookup)) def test_lookup_table_with_None(self): #given signature = None #when lookup = self.l.lookup_table(signature) #then self.assertEqual([], lookup) return def test_lookup_tabke_with_empty(self): #given signature = [] #when lookup = self.l.lookup_table(signature) #then self.assertEqual([], lookup)
class TestLshHashing(TestCase): hash = hdhash.HdHash(WINDOW_SIZE / BAND * NR_BIT_PER_WORD, 8) l = lsh.Lsh(WINDOW_SIZE, BAND, ROW, hash) def test_hashing(self): #given data = [0] * WINDOW_SIZE * 2 #when value = self.l.hashing(data) #then self.assertEqual(BAND * 2, len(value)) self.assertEqual('0' * BAND * 2, "".join(str(x) for x in value))
class TestParallelSearch(TestCase): window_size = 155 band = 31 row = 8 band_size = int(window_size / band) sc = SparkContext(appName='SearchMusic') ss = SparkSession.builder.appName('SearchMusic').master( 'local[*]').getOrCreate() query_df, music_df = music_query_parser.do(sc, ss, query_file, music_file) h = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row) search = parallel_search.ParallelSearch(ss, query_df, music_df, window_size, band, row, h, hdhash.hamming_distance) def test_by_qid_mid(self): self.search.by_qid_and_mid(21, 42, 0.35)
class TestParallelLsh(TestCase): window_size = 155 band = 31 row = 8 band_size = int(window_size/band) sc = SparkContext(appName='SearchMusic') ss = SparkSession.builder.appName('SearchMusic').master('local[*]').getOrCreate() query_df, music_df = music_query_parser.do(sc, ss, query_file, music_file) h = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row) search = parallel_search.ParallelSearch(ss, query_df, music_df, window_size, band, row, h, hdhash.hamming_distance) def test_lookup_table(self): mdf = self.music_df mdf = self.p.lookup_table(mdf, 'mcode', 'mid') mdf.persist() mdf.show()
sc, ss = spark_init() window_size = 155 band = 31 row = 8 band_size = int(window_size/band) t1 = time.time() query_df, music_df = music_query_parser.do(sc, ss, qfile, mfile) #debug print ('befor search') lsh_hash = hdhash.HdHash(band_size * NR_BIT_PER_WORD, row) search = parallel_search.ParallelSearch(ss, query_df, music_df, window_size, band, row, lsh_hash, hdhash.hamming_distance) t2 = time.time() # qid = 7 # searched = search.by_qid(qid, THRESHOLD) # if not searched: # print ('Search failed: Query[%d]' % qid) # else: # for s in searched: # print ('Search success: Query[%d] hit on %s, songid(%d), code_idx(%d)~code_idx(%d)' %(s['qid'], s['file'], s['mid'], s['col']*5, s['col']*5 + window_size)) nr_query = 30