def __init__(self, d): self.d = d self.Data = [random.gauss(0, 1) for i in range(self.d)] if True == gol.get_value('DEBUG'): print('CosineLsh:\td: ' + str(self.d)) print('CosineLsh Data: ') print(self.Data)
def resize(self, L): if True == gol.get_value('DEBUG'): print 'LshWrapper resize:\tnew L: ' + str(L) + '\tL: ' + str(self.L) + '\tk: ' + str(self.k) # shrink the number of hash tables to be used if L < self.L: self.hash_tables = self.hash_tables[:L] else: # initialise a new hash table for each hash function hash_funcs = [[self.__creat_ht__() for h in xrange(self.k)] for l in xrange(self.L, L)] self.hash_tables.extend([(g, defaultdict(lambda:[])) for g in hash_funcs]) #g是一个hash表 if True == gol.get_value('DEBUG'): print 'resize wrapper hashtable: ' for ht, ct in self.hash_tables: print 'ht: ' + str(type(ht)) + '\tct: ' + str(type(ct)) print ht print ct self.L = L
def __init__(self, r, d): # hash family 各参数 self.r = r self.d = d self.b = random.uniform(0, self.r) # 0 < b < r //随机生成b,0 < b < r self.Data = [random.gauss(0, 1) for i in xrange(self.d)] #高斯分布 if True == gol.get_value('DEBUG'): print 'L2Lsh:\tr: ' + str(self.r) + '\td: ' + str(self.d) + '\tb: ' + str(self.b) print 'L2Lsh Data: ' print self.Data
def __init__(self, lsh_type, d, r = 1.0, k = 2, L = 2): self.type = lsh_type self.d = d self.r = r self.k = k self.L = 0 self.hash_tables = [] self.resize(L) if True == gol.get_value('DEBUG'): print 'LshWrapper init:\ttype: ' + str(self.type) + '\td: ' + str(self.d) + '\tr: ' + str(self.r) + '\tk: ' + str(self.k) + '\tL: ' + str(self.L)
def index(self, datas): # index the supplied datas self.datas = datas for ht, ct in self.hash_tables: for ix, p in enumerate(self.datas): ct[self.hash(ht, p)].append(ix) # reset stats self.tot_touched = 0 self.num_queries = 0 if True == gol.get_value('DEBUG'): print('index wrapper hashtable: ') for ht, ct in self.hash_tables: print('ht: ' + str(type(ht)) + '\tct: ' + str(type(ct))) print(ht) print(ct)