예제 #1
0
    def __init__(self):
        # Define Supported hashes
        hashes = dict()
        hashes['md2'] = lambda x: self._get_md2_hash(x)
        hashes['md4'] = lambda x: self._get_hashlib_hash('md4', x)
        hashes['md5'] = lambda x: hashlib.md5(x).hexdigest()
        hashes['sha'] = lambda x: self._get_hashlib_hash('sha', x)
        hashes['sha1'] = lambda x: hashlib.sha1(x).hexdigest()
        hashes['sha256'] = lambda x: hashlib.sha256(x).hexdigest()
        hashes['sha224'] = lambda x: hashlib.sha224(x).hexdigest()
        hashes['sha384'] = lambda x: hashlib.sha384(x).hexdigest()
        hashes['sha512'] = lambda x: hashlib.sha512(x).hexdigest()
        hashes['sha3_224'] = lambda x: sha3.sha3_224(x).hexdigest()
        hashes['sha3_256'] = lambda x: sha3.sha3_256(x).hexdigest()
        hashes['sha3_384'] = lambda x: sha3.sha3_384(x).hexdigest()
        hashes['sha3_512'] = lambda x: sha3.sha3_512(x).hexdigest()
        hashes['mmh2'] = lambda x: str(mmhash.get_hash(x))
        hashes['mmh2_unsigned'] = lambda x: str(mmhash.get_unsigned_hash(x))
        hashes['mmh3_32'] = lambda x: str(mmh3.hash(x))
        hashes['mmh3_64_1'] = lambda x: str(mmh3.hash64(x)[0])
        hashes['mmh3_64_2'] = lambda x: str(mmh3.hash64(x)[1])
        hashes['mmh3_128'] = lambda x: str(mmh3.hash128(x))
        hashes['ripemd160'] = lambda x: self._get_hashlib_hash('ripemd160', x)
        hashes['whirlpool'] = lambda x: self._get_hashlib_hash('whirlpool', x)
        hashes['blake2b'] = lambda x: pyblake2.blake2b(x).hexdigest()
        hashes['blake2s'] = lambda x: pyblake2.blake2s(x).hexdigest()
        hashes['crc32'] = lambda x: str(zlib.crc32(x))
        hashes['adler32'] = lambda x: str(zlib.adler32(x))

        self._hashes = hashes
        self.hashes_and_checksums = self._hashes.keys()
        self.supported_hashes = HASHES
예제 #2
0
 def sum(self, content):
     if self.algo == 'BKDR':
         return self.__BKDRHash(content)
     elif self.algo == 'MRMR':
         return mmhash.get_hash(content)
     else:
         return str(content).__hash__()
예제 #3
0
 def handle_doc(self, doc, doc_id):
     self.links.append(doc.url)
     words = set(extract_words(doc.text));
     for word in words:
         word_hash = get_hash(word.encode("UTF-8"))
         last_id, arr = self.index.get(word_hash, (0, list()))
         arr.append(doc_id - last_id)
         self.index[word_hash] = (doc_id, arr)
예제 #4
0
 def search_word(self, word):
     # word_hash = get_hash(word.encode("UTF-8"))
     word_hash = get_hash(word)
     pos, size = self.dictionary.get(word_hash, (0, 0))
     if size == 0:
         return set()
     self.index.seek(pos)
     return set(self.compression(self.index.read(size), True))
예제 #5
0
 def _by_hash(cls, key):
     for i in cls.where(
         **{
             hk:get_hash(key)
         }
     ):
         if getattr(i, name) == key:
             return i
예제 #6
0
 def hash2(self, a):
     return mmhash.get_hash(str(hash(str(a)) % 2**31)) % self.weightLength
예제 #7
0
 def hash1(self, a):
     return mmhash.get_hash(a) % self.weightLength
예제 #8
0
 def _rademacher(self, val):
     if (mmhash.get_hash(val) >> 1) % 2 == 0:
         return 1 
     else:
         return -1
예제 #9
0
 def _hash(self, val):
     return mmhash.get_hash(val) % self.vectorSize
예제 #10
0
def save_hash(self, name):
    if self._new_record or (name in self._changed):
        h = get_hash( getattr(self, name) )
        setattr(self, name+'_hash', h)
예제 #11
0
def save_hash(self, name):
    if self._new_record or (name in self._changed):
        h = get_hash(getattr(self, name))
        setattr(self, name + '_hash', h)
예제 #12
0
 def _by_hash(cls, key):
     for i in cls.where(**{hk: get_hash(key)}):
         if getattr(i, name) == key:
             return i
예제 #13
0
 def _rademacher(self, val):
     if (mmhash.get_hash(val) >> 1) % 2 == 0:
         return 1
     else:
         return -1
예제 #14
0
 def _hash(self, val):
     return mmhash.get_hash(val) % self.vectorSize
예제 #15
0
 def hash2(self, a):
     return mmhash.get_hash(str(hash(str(a)) % 2**31)) % self.weightLength
예제 #16
0
 def hash1(self, a):
     return mmhash.get_hash(a) % self.weightLength