def __init__(self): # Define Supported hashes hashes = dict() hashes['md2'] = lambda x: self._get_md2_hash(x) hashes['md4'] = lambda x: self._get_hashlib_hash('md4', x) hashes['md5'] = lambda x: hashlib.md5(x).hexdigest() hashes['sha'] = lambda x: self._get_hashlib_hash('sha', x) hashes['sha1'] = lambda x: hashlib.sha1(x).hexdigest() hashes['sha256'] = lambda x: hashlib.sha256(x).hexdigest() hashes['sha224'] = lambda x: hashlib.sha224(x).hexdigest() hashes['sha384'] = lambda x: hashlib.sha384(x).hexdigest() hashes['sha512'] = lambda x: hashlib.sha512(x).hexdigest() hashes['sha3_224'] = lambda x: sha3.sha3_224(x).hexdigest() hashes['sha3_256'] = lambda x: sha3.sha3_256(x).hexdigest() hashes['sha3_384'] = lambda x: sha3.sha3_384(x).hexdigest() hashes['sha3_512'] = lambda x: sha3.sha3_512(x).hexdigest() hashes['mmh2'] = lambda x: str(mmhash.get_hash(x)) hashes['mmh2_unsigned'] = lambda x: str(mmhash.get_unsigned_hash(x)) hashes['mmh3_32'] = lambda x: str(mmh3.hash(x)) hashes['mmh3_64_1'] = lambda x: str(mmh3.hash64(x)[0]) hashes['mmh3_64_2'] = lambda x: str(mmh3.hash64(x)[1]) hashes['mmh3_128'] = lambda x: str(mmh3.hash128(x)) hashes['ripemd160'] = lambda x: self._get_hashlib_hash('ripemd160', x) hashes['whirlpool'] = lambda x: self._get_hashlib_hash('whirlpool', x) hashes['blake2b'] = lambda x: pyblake2.blake2b(x).hexdigest() hashes['blake2s'] = lambda x: pyblake2.blake2s(x).hexdigest() hashes['crc32'] = lambda x: str(zlib.crc32(x)) hashes['adler32'] = lambda x: str(zlib.adler32(x)) self._hashes = hashes self.hashes_and_checksums = self._hashes.keys() self.supported_hashes = HASHES
def sum(self, content): if self.algo == 'BKDR': return self.__BKDRHash(content) elif self.algo == 'MRMR': return mmhash.get_hash(content) else: return str(content).__hash__()
def handle_doc(self, doc, doc_id): self.links.append(doc.url) words = set(extract_words(doc.text)); for word in words: word_hash = get_hash(word.encode("UTF-8")) last_id, arr = self.index.get(word_hash, (0, list())) arr.append(doc_id - last_id) self.index[word_hash] = (doc_id, arr)
def search_word(self, word): # word_hash = get_hash(word.encode("UTF-8")) word_hash = get_hash(word) pos, size = self.dictionary.get(word_hash, (0, 0)) if size == 0: return set() self.index.seek(pos) return set(self.compression(self.index.read(size), True))
def _by_hash(cls, key): for i in cls.where( **{ hk:get_hash(key) } ): if getattr(i, name) == key: return i
def hash2(self, a): return mmhash.get_hash(str(hash(str(a)) % 2**31)) % self.weightLength
def hash1(self, a): return mmhash.get_hash(a) % self.weightLength
def _rademacher(self, val): if (mmhash.get_hash(val) >> 1) % 2 == 0: return 1 else: return -1
def _hash(self, val): return mmhash.get_hash(val) % self.vectorSize
def save_hash(self, name): if self._new_record or (name in self._changed): h = get_hash( getattr(self, name) ) setattr(self, name+'_hash', h)
def save_hash(self, name): if self._new_record or (name in self._changed): h = get_hash(getattr(self, name)) setattr(self, name + '_hash', h)
def _by_hash(cls, key): for i in cls.where(**{hk: get_hash(key)}): if getattr(i, name) == key: return i