def calc_onehash(shingle, seed): def c4_hash(shingle): h = struct.unpack('<i',shingle)[0] return h % ((sys.maxsize + 1) * 2) if self.sh_type == 'c4': return operator.xor(c4_hash(shingle), long(seed)) % self.modulo else: return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
def calc_onehash(sh_type, shingle, seed, modulo): def c4_hash(shingle): h = struct.unpack('<i', shingle)[0] return h % ((sys.maxsize + 1) * 2) if sh_type == 'c4': return operator.xor(c4_hash(shingle), long(seed)) % modulo else: return operator.xor(compute_positive_hash(shingle), long(seed)) % modulo
def _calculate_hash(self, obj): """ This method computes hash of object using a dynamic hashing function. :param obj: object we are computing hash code for. :return: hash code (long integer) """ # hash value to positive integer h1 = compute_positive_hash(obj) #calculate hash code: h(obj) mod 2^b (where b is num_bands) return h1 % 2**self.num_bands
def calc_onehash(shingle, seed): def c4_hash(shingle): h = struct.unpack('<i',shingle)[0] hash_val = h & max_mask return hash_val # hash_val = shingle_cache.get(shingle) # if hash_val: # return hash_val # h = struct.unpack('<i',shingle)[0] # hash_val = h & max_mask # shingle_cache.set(shingle, hash_val) # return hash_val if self.sh_type == 'c4': return operator.xor(c4_hash(shingle), long(seed)) % self.modulo else: return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
def calc_onehash(shingle, seed): def c4_hash(shingle): try: h = struct.unpack('<i',shingle)[0] except struct.error: # We land here when the shingle has non-ascii characters in it. size = 4 encoded = shingle.encode('utf-8') int_hashes = [int(encoded[i:i + size].encode('hex'), 16) for i in range(len(encoded)/size)] h = reduce(operator.xor, int_hashes) hash_val = h & settings.max_mask return hash_val if self.sh_type == 'c4': return operator.xor(c4_hash(shingle), long(seed)) % self.modulo else: return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
def calc_onehash(shingle, seed): def c4_hash(shingle): try: h = struct.unpack('<i', shingle)[0] except struct.error: # We land here when the shingle has non-ascii characters in it. size = 4 encoded = shingle.encode('utf-8') int_hashes = [ int(encoded[i:i + size].encode('hex'), 16) for i in range(len(encoded) / size) ] h = reduce(operator.xor, int_hashes) hash_val = h & settings.max_mask return hash_val if self.sh_type == 'c4': return operator.xor(c4_hash(shingle), long(seed)) % self.modulo else: return operator.xor(compute_positive_hash(shingle), long(seed)) % self.modulo
def calc_hash(value): return compute_positive_hash(value)