def add(self, uuid): """ Adds a key to the HyperLogLog """ if uuid: # Computing the hash try: x = smhasher.murmur3_x64_64(uuid) except UnicodeEncodeError: x = smhasher.murmur3_x64_64(uuid.encode('ascii', 'ignore')) # Finding the register to update by using the first b bits as an index j = x & ((1 << self.b) - 1) # Remove those b bits w = x >> self.b # Find the first 0 in the remaining bit pattern self.M[j] = max(self.M[j], self._get_rho(w, self.bitcount_arr))
def Hit(self, e): self.hits += 1 self.requests += 1 key = e.key hv = smhasher.murmur3_x64_64(key) if (hv % self.RLIMIT) < self.R: #print "Hit passed" self.Rpassed += 1 else: #print "Hit ignored" self.Rignored += 1 return ma = self.map_activity(e.activity) self.binsizesum += self.activecount[ma] self.update_pdf(ma) # We want to update the PDF before changing any activitycount if self.R == 1: assert self.activecount[ma] >= 1 self.activecount[ma] -= 1 add = 0 if ma == self.MAX_ACTIVITY: add = 1 if self.activecount[-1] + add >= self.binsize: self.ageentries() self.activecount[self.MAX_ACTIVITY] += 1 e.activity = self.TOP_VAL if self.R == 1: assert abs(self.hits - sum(self.pdf.values())) < 1.0e-5
def Hit(self, e): self.hits += 1 self.requests += 1 key = e.key hv = smhasher.murmur3_x64_64(key) if (hv % self.RLIMIT) < self.R: #print "Hit passed" self.Rpassed += 1 else: #print "Hit ignored" self.Rignored += 1 return ma = self.map_activity(e.activity) self.binsizesum += self.activecount[ma] self.update_pdf( ma) # We want to update the PDF before changing any activitycount if self.R == 1: assert self.activecount[ma] >= 1 self.activecount[ma] -= 1 add = 0 if ma == self.MAX_ACTIVITY: add = 1 if self.activecount[-1] + add >= self.binsize: self.ageentries() self.activecount[self.MAX_ACTIVITY] += 1 e.activity = self.TOP_VAL if self.R == 1: assert abs(self.hits - sum(self.pdf.values())) < 1.0e-5
def increment(self, element: object) -> None: element_str = str(element).encode('utf-8') h = smhasher.murmur3_x64_64( element_str) % self.hash_function_upper_bound bucket_idx = self.get_bucket_id(h) self.max_hash_value_by_buckets[bucket_idx] = max( self.max_hash_value_by_buckets[bucket_idx], self.count_leading_zeros(h))
def Evict(self, key): hv = smhasher.murmur3_x64_64(key) #print "Evict", hv if (hv % self.R) != 0: return if self.R * self.counters[self.first] >= self.capacity_per_filter: self.rotateFilters() if not self.cfs[self.first].check(key): #print "inserting %s into the first filter" % key self.cfs[self.first].add(key, 0) self.counters[self.first] += 1
def Set(self, e): key = e.key hv = smhasher.murmur3_x64_64(key) if (hv % self.RLIMIT) < self.R: #print "Set passed" self.Rpassed += 1 else: #print "Set ignore" #self.Rignored += 1 return e.activity = self.TOP_VAL self.activecount[self.MAX_ACTIVITY] += 1
def _make_hashfuncs(key): if isinstance(key, text_type): key = key.encode('utf-8') else: key = str(key) rval = [] current_hash = None for i in range(nbr_slices): seed = current_hash or 0 current_hash = smhasher.murmur3_x64_64(key, seed) rval.append(current_hash % nbr_bits) return rval
def Evict(self, e): key = e.key hv = smhasher.murmur3_x64_64(key) if (hv % self.RLIMIT) < self.R: self.Rpassed += 1 else: #print "Evict ignore" self.Rignored += 1 return ma = self.map_activity(e.activity) self.activecount[ma] -= 1 if self.R == 1: assert self.activecount[ma] >= 0
def Miss(self, key): hv = smhasher.murmur3_x64_64(key) if (hv % self.R) != 0: self.Rignored += 1 return else: self.Rpassed += 1 found = False start = 0 end = 0 val = 0 first = self.first % 3 second = (self.first + 1) % 3 last = (self.first + 2) % 3 for index in [first, second, last]: end += self.R * self.counters[index] if self.cfs[index].check(key): found = True break start += self.R * self.counters[index] if found: if end > self.capacity: if start < self.capacity: # This is overestimating values from [capacity,...,1.5*capacity] # hence scale down x = self.R * (1.0 - FPP_RATE) * (end - self.capacity + 0.0) / (end - start + 0.0) assert x >= 0 self.ghosthits += x else: self.ghosthits += 0 # hit at stack distance greater than the capacity else: self.ghosthits += self.R * (1.0 - FPP_RATE) val = self.R * (1.0 / (end - start)) * (1.0 - FPP_RATE) for i in xrange(start, min(int(1.5 * self.capacity), end)): self.pdf[i] += val else: self.ghostmisses += 1 return found
def Miss(self, key): hv = smhasher.murmur3_x64_64(key) if (hv % self.R) != 0: self.Rignored += 1 return else: self.Rpassed += 1 found = False start = 0 end = 0 val = 0 first = self.first % 3 second = (self.first + 1) % 3 last = (self.first + 2) % 3 for index in [first, second, last]: end += self.R * self.counters[index] if self.cfs[index].check(key): found = True break start += self.R * self.counters[index] if found: if end > self.capacity: if start < self.capacity: # This is overestimating values from [capacity,...,1.5*capacity] # hence scale down x = self.R * (1.0 - FPP_RATE) * (end - self.capacity + 0.0) / (end - start + 0.0) assert x >= 0 self.ghosthits += x else: self.ghosthits += 0 # hit at stack distance greater than the capacity else: self.ghosthits += self.R * (1.0 - FPP_RATE) val = self.R * ( 1.0 / (end - start) ) * (1.0 - FPP_RATE) for i in xrange(start, min(int(1.5*self.capacity), end)): self.pdf[i] += val else: self.ghostmisses += 1 return found
def hashval(self, message): h = smhasher.murmur3_x64_64(message) final = h / INT_64_MAX return final
import sys import smhasher random_str = "random_str_" for num in range (0,100): new_str = random_str + str(num) val = abs(smhasher.murmur3_x64_64(new_str,0x1234ABCD)) % 4 print val
def shard_from_id(id): index = smhasher.murmur3_x64_64(id) % 2 shard = 'shard' + str(index + 1) return shard