def test_hash128(): assert mmh3.hash128('foo') == 168394135621993849475852668931176482145 assert mmh3.hash128('foo', 42) == 215966891540331383248189432718888555506 assert mmh3.hash128( 'foo', 42, signed=False) == 215966891540331383248189432718888555506 assert mmh3.hash128( 'foo', 42, signed=True) == -124315475380607080215185174712879655950
def generate_table(self, item_ids): """ Given a list of item IDs, generate a corresponding IBLT Args: item_ids(list): A list of IDs for items to be included in IBLT. Returns: list: An invertible bloom lookup table in format list of lists. """ bloom = [(0, 0, 0)] * self.m for item in item_ids: hash_values = [] for seed in self.seed_list: hash_values.append(mmh3.hash128(str(item).encode(), seed)) for hash_value in hash_values: index = hash_value % self.m id_sum = bloom[index][0] ^ item if bloom[index][1] == 0: hash_sum = mmh3.hash128( str(item).encode(), self.element_hash) else: hash_sum = bloom[index][1] ^ mmh3.hash128( str(item).encode(), self.element_hash) count = bloom[index][2] + 1 bloom[index] = (id_sum, hash_sum, count) return bloom
def peel_element(self, element_id, table, alteration): """ Peels a single element from a given IBLT. Args: element_id(int): The element to be peeled. table(list): The invertible bloom lookup table. alteration(int): The indicator as to which list this element was stored in (1 OR -1) Returns: list: An updated invertible bloom lookup table with the given element removed. """ hash_values = [] element_hash = mmh3.hash128( str(element_id).encode(), self.element_hash) for seed in self.seed_list: hash_values.append(mmh3.hash128(str(element_id).encode(), seed)) for hash_value in hash_values: index = hash_value % self.m id_sum = table[index][0] ^ element_id if table[index][1] == 0: hash_sum = element_hash else: hash_sum = table[index][1] ^ element_hash count = table[index][2] - alteration table[index] = (id_sum, hash_sum, count) return table
def generate_table(item_ids, seed_key, table_size=_M, max_hashes=MAX_HASHES, a_value=DEFAULT_A_VALUE, hash_decider=None, hash_decider_length=MAX_RANDOM_HASHES, seed_range=MAX_RANDOM_HASHES): """ Generate the randomized hash function quantity based IBLT Args: a_value: The value for a in the ALOHA style distribution function. item_ids: The IDs of the items to be inserted. seed_key: Shared key to instantiate hash functions. table_size: Size of the IBLT. max_hashes: Upper bound for total hashes to be used. hash_decider(list[int]): List of random numbers for hashing iterations. hash_decider_length: Size of the list of random numbers determining the amount of times an item is added. seed_range: The upper bound of the values of any given seed key. Returns: tuple[list[tuple], list[int], list[int]]: An IBLT as a list of tuples, each element is of the form (idSum, hashSum, count). """ bloom = [(0, 0, 0)] * table_size if hash_decider is None: hash_decider = IBLT.generate_hash_decider(seed_key, max_hashes, a_value, hash_decider_length) seed_list = IBLT.generate_seed_list(seed_key, max_hashes, seed_range) for item in item_ids: item_hash = mmh3.hash128(str(item).encode(), seed_key) hash_quantity = hash_decider[item_hash % len(hash_decider)] hash_values = [] # Calculate hash values for the item and derive the index for encoding for i in range(hash_quantity): hash_values.append( mmh3.hash128(str(item).encode(), seed_list[i])) for hash_value in hash_values: index = hash_value % table_size id_sum = bloom[index][0] ^ item if bloom[index][1] == 0: hash_sum = item_hash else: hash_sum = bloom[index][1] ^ item_hash count = bloom[index][2] + 1 bloom[index] = (id_sum, hash_sum, count) return bloom, seed_list, hash_decider
def lemmatizeFile(file): f = open(file, 'r') # wnl = WordNetLemmatizer() wordsHashMap = {} words = {} word = "" while 1: c = f.read(1) if c: if ('a' <= c <= 'z') or ('A' <= c <= 'Z') or (c == '\''): word += c else: if word: word = word.lower() if word not in processedWords: canonical = word if word in exceptions: if word in words: words[word] += 1 else: words[word] = 1 wordsHashMap[mmh3.hash128(word)] = [word, words[word]] else: if word not in stopWords: # tag = nltk.pos_tag([word]) if 1: # tag[0][1] in tags: # t = tags[tag[0][1]] canonical = word # wnl.lemmatize(word, t) if canonical in words: words[canonical] += 1 else: words[canonical] = 1 wordsHashMap[mmh3.hash128(canonical)] = [canonical, words[canonical]] processedWords[word] = canonical else: if word in words: words[word] += 1 else: words[word] = 1 wordsHashMap[mmh3.hash128(word)] = [word, words[word]] word = "" else: break """ with open(f.name + '.wordsHashMap', 'w') as outfile: json.dump(wordsHashMap, outfile) # """ return wordsHashMap
def get_hash(i, value): if (i == 0): i = -12345 #https://stackoverflow.com/questions/11954086/which-hash-functions-to-use-in-a-bloom-filter hash1 = hashlib.sha256() hash1.update(value.encode('utf-8')) return int(hash1.hexdigest(), 16) + (i * mmh3.hash128(value))
def hash_vulnerability(data: dict): string_hash = [] for x in ('cve', 'id', 'cwe', 'title'): if x in data: string_hash.append(data[x]) return mmh3.hash128("#".join(string_hash))
def _get_bucket_idxes(self, element): idxes = [] # Each i is a seed for a new Universal Hash Fn for i in range(self._k): idxes.append(mmh3.hash128(element, i) % len(self._bit_array)) return idxes
async def vid(request: Request): task_cache = GlobalCache() global wtf if not wtf: loop = asyncio.get_event_loop() loop.create_task(fetch_downloader()) wtf = True vid_url = request.form.get('url') if type(vid_url) is not str: return redirect("/") vid_url = vid_url.split("&")[0] if vid_url.startswith("http"): vid_url = vid_url.rstrip("https://").rstrip("http://") url_hash = hash128(vid_url) if url_hash not in task_cache: task_cache[url_hash] = Task() downloader_p = Process(target=download_vid, args=(vid_url, download_q, url_hash)) downloader_p.start() return redirect(f"./task/{url_hash}")
def generate_filter(self, items, seeds=None, m=None): """ Given a number of items, generate a bloom filter. Args: items(list, dict): A list of items or a single item to be inserted to the filter. seeds(list): (Optional) A list of seed values for hashing algorithm. m(int): (Optional) Size of bloom filter array. Returns: bytearray: An array of binary bits representing the bloom filter. """ if type(items) == dict: keys = items.keys() temp_list = [] for key in keys: temp_string = str(key) + ":" + str(items[key]) temp_list.append(temp_string) items = temp_list bloom_filter = bytearray(int(self.m)) if seeds is None: seeds = self.seed_list if m is None: m = self.m for item in items: for seed in seeds: index = mmh3.hash128(str(item).encode(), seed) % m bloom_filter[index] = 1 return bloom_filter
def _k_smallest_hash(self, document): """ Generates a texts minhash signature using k smallest neighbours method. Uses a single random hash to simulate a shuffle of each texts shingles. Then selecting i smallest minimum hash values for j permutations. Faster but less stable than multi hash method. Args: document (list): List of text shingles. Returns: list: List of text signatures generated using k smallest neighbours method. """ signature = [] # Uses a heap to make calculating n smallest values more efficient. heapq.heapify(signature) if len(document) <= self.permutations: raise ValueError( 'N permutations must not be >= n shingles for k_smallest_values method' ) for shingle in document: if self.hash_bits == 64: hashed_shingle = mmh3.hash64(shingle, self._hash_seeds)[0] elif self.hash_bits == 32: hashed_shingle = mmh3.hash(shingle, self._hash_seeds) else: hashed_shingle = mmh3.hash128(shingle, self._hash_seeds) heapq.heappush(signature, hashed_shingle) return heapq.nsmallest(self.permutations, signature)
def __init__(self): # Define Supported hashes hashes = dict() hashes['md2'] = lambda x: self._get_md2_hash(x) hashes['md4'] = lambda x: self._get_hashlib_hash('md4', x) hashes['md5'] = lambda x: hashlib.md5(x).hexdigest() hashes['sha'] = lambda x: self._get_hashlib_hash('sha', x) hashes['sha1'] = lambda x: hashlib.sha1(x).hexdigest() hashes['sha256'] = lambda x: hashlib.sha256(x).hexdigest() hashes['sha224'] = lambda x: hashlib.sha224(x).hexdigest() hashes['sha384'] = lambda x: hashlib.sha384(x).hexdigest() hashes['sha512'] = lambda x: hashlib.sha512(x).hexdigest() hashes['sha3_224'] = lambda x: sha3.sha3_224(x).hexdigest() hashes['sha3_256'] = lambda x: sha3.sha3_256(x).hexdigest() hashes['sha3_384'] = lambda x: sha3.sha3_384(x).hexdigest() hashes['sha3_512'] = lambda x: sha3.sha3_512(x).hexdigest() hashes['mmh2'] = lambda x: str(mmhash.get_hash(x)) hashes['mmh2_unsigned'] = lambda x: str(mmhash.get_unsigned_hash(x)) hashes['mmh3_32'] = lambda x: str(mmh3.hash(x)) hashes['mmh3_64_1'] = lambda x: str(mmh3.hash64(x)[0]) hashes['mmh3_64_2'] = lambda x: str(mmh3.hash64(x)[1]) hashes['mmh3_128'] = lambda x: str(mmh3.hash128(x)) hashes['ripemd160'] = lambda x: self._get_hashlib_hash('ripemd160', x) hashes['whirlpool'] = lambda x: self._get_hashlib_hash('whirlpool', x) hashes['blake2b'] = lambda x: pyblake2.blake2b(x).hexdigest() hashes['blake2s'] = lambda x: pyblake2.blake2s(x).hexdigest() hashes['crc32'] = lambda x: str(zlib.crc32(x)) hashes['adler32'] = lambda x: str(zlib.adler32(x)) self._hashes = hashes self.hashes_and_checksums = self._hashes.keys() self.supported_hashes = HASHES
def _multi_hash(self, document): """ Generates a texts minhash signature using multi-hash method. Uses i random hashes for j permutations selecting the minimum hash value each time to build each texts hash signature. Slower but more stable than k smallest hash method. Args: document (list): List of document shingles. Returns: list: List of text signatures generated using k smallest neighbours method. """ signature = [] for seed in np.nditer(self._hash_seeds): self._min_value = None for shingle in document: if self.hash_bits == 64: hash_value = mmh3.hash64(shingle, int(seed))[0] elif self.hash_bits == 32: hash_value = mmh3.hash(shingle, int(seed)) else: hash_value = mmh3.hash128(shingle, int(seed)) if not self._min_value: self._min_value = hash_value elif self._min_value > hash_value: self._min_value = hash_value signature.append(self._min_value) return signature
def validate(): email_message = '' already_sent = None if os.path.exists('processed.json'): sent_json_file = open('processed.json') already_sent = json.load(sent_json_file) sent_json_file.close() with open('saved.json') as json_file: data = json.load(json_file) if not already_sent: already_sent = {} for p in data: try: with urllib.request.urlopen(p) as response: html = response.read() encoded = base64.b64encode(html) hashed = mmh3.hash128(encoded, 42, signed=True) mark = True if p in already_sent: if already_sent[p]: mark = False if mark: if data[p] != hashed: email_message += "- Failed Hash for: " + str( p) + '\r\n' already_sent[p] = True except: email_message += "- Failed EXCEPTION for: " + str(p) + '\r\n' if email_message != '': email_error(email_message, already_sent)
def hash_to_bucket(e, B): i = mmh3.hash128(str(e)) p = i / float(2**128) for j in range(0, B): if j / float(B) <= p and (j + 1) / float(B) > p: return j + 1 return B
def forward(resource, identifier): """ Redirects request for file to direct URL. Requires global "paths" dictionary is active. resource: a given resource, like "recount2" identifier: relative path to file or directory Return value: Flask redirect response object """ # Log all requests, even weird ones ip = str(request.headers.get('X-Forwarded-For', request.remote_addr)).split(',')[0].strip() print >>_LOGSTREAM, '\t'.join( [time.strftime('%A, %b %d, %Y at %I:%M:%S %p %Z'), str(mmh3.hash128(ip + 'recountsalt')), resource, identifier]) _LOGSTREAM.flush() if resource == 'recount': # Redirect to IDIES URL in order of descending version for i in ['2']: # add versions to precede 2 as they are released if identifier.startswith(' '.join(['v', i, '/'])): idies_url = '/'.join( ['http://idies.jhu.edu/recount/data', identifier] ) idies_response = requests.head(idies_url) if idies_response.status_code == 200: return redirect(idies_url, code=302) # v1 is not explicitly versioned idies_url = '/'.join(['http://idies.jhu.edu/recount/data', identifier]) idies_response = requests.head(idies_url) if idies_response.status_code == 200: return redirect(idies_url, code=302) abort(404)
def _read(self, file_path: str) -> Iterable[Instance]: file_path = Path(file_path) files = file_path.iterdir() exist = 0 non_exist = 0 for f in filter(lambda x: x.suffix == '.json', files): with open(f) as jfile: data = json.load(jfile) for article_summary in data: url = article_summary['url'] article_summary['summary'], _ = re.subn( r"(\s?\([^)]*\)\s?)", "", article_summary['summary']) result = {'browser': article_summary} try: sentences_path = file_path / "../scored_sentences/{:x}.json".format( mmh3.hash128(url)) except UnicodeError: continue if sentences_path.exists(): try: with open(sentences_path) as sentence_file: sentences = json.load(sentence_file) except (ValueError, IOError): continue exist += 1 result['sentences'] = sentences for sentence, label in sentences['sentences']: yield self.text_to_instance(sentence, label) else: non_exist += 1 print(exist, non_exist) if self.max_files and exist > self.max_files: return
def murmur3_128bit(obj): """ Use murmur3_128bit for bit hash by passing this method: hasher=DeepHash.murmur3_128bit This hasher is the default hasher. """ obj = obj.encode('utf-8') return mmh3.hash128(obj, MURMUR_SEED)
def add(self, item): if self.isContain(item): return False else: for i in range(self.numHash): bitIndex = long(mmh3.hash128(item, i) % self.size) self.bitArray[bitIndex] = 1 return True
def simple_object(key, value): "Create a simple key/value object." return { "_id": mmh3.hash128(value), "_type": key, "_tool": TOOL, key: value, }
def get_file_hashes(file_path: Path) -> Iterator[int]: files = file_path.iterdir() for f in filter(lambda x: x.suffix == '.json', files): with open(f) as jfile: data = json.load(jfile) for article_summary in data: url = article_summary['url'] yield mmh3.hash128(url)
def test_64bit(): if sys.maxsize < (1 << 32): # Skip this test under 32-bit environments return a = np.zeros(2**32, dtype=np.int8) assert mmh3.hash(a) == -1988950868 assert mmh3.hash64(a) == (-6319308327427928234, -8156928649350215884) assert mmh3.hash128(a) == 189813591698865711411311444615608766294 assert mmh3.hash_bytes(a) == b'V\x8f}\xad\x8eNM\xa84\x07FU\x9c\xc4\xcc\x8e'
def flajolet_martin_algo(k,v, accum,seed,n): key = ['MM','OH','SIGH','UM'] idx = key.index(k) hkey = mmh3.hash128(v.lower(), seed)%n hkey = format(hkey, 'b') trailing_zeros = len(hkey) - len(hkey.rstrip('0')) accum.add(pow(2,trailing_zeros)) return
def run_plugin(data: dict) -> List[dict or None]: port = data['port'] domain = data['domain'] log.info(f"Starting DNS information gathering for domain " f"{domain}") output_result = "/tmp/result.json" if platform.system() == "Darwin": binary = 'testssl.sh' else: binary = 'testssl' command = f"{binary} --jsonfile-pretty={output_result} " \ f"--severity MEDIUM --sneaky -U -S -p " \ f"{domain}:{port}" execution_result = launch_command(command, callback=(print, log.info), file_result=output_result) # ------------------------------------------------------------------------- # Finding results # ------------------------------------------------------------------------- json_execution_result = json.loads(execution_result) results = [] for host in json_execution_result['scanResult']: # --------------------------------------------------------------------- # Recover vulnerabilities # --------------------------------------------------------------------- for vulnerability in host['vulnerabilities']: # ----------------------------------------------------------------- # Build IP data # ----------------------------------------------------------------- ip = { '_type': 'ip', 'ip': host['ip'] } ip['_id'] = calculate_hash(ip) # ----------------------------------------------------------------- # Build vulnerability data # ----------------------------------------------------------------- v = { '_type': 'vulnerability', 'cve': vulnerability.get('cve', ""), 'title': vulnerability['id'], 'description': vulnerability.get('finding', ""), 'cwe': vulnerability.get('cwe', "") } v['_id'] = mmh3.hash128(f"{ip['_id']}#{calculate_hash(v)}") results.append([ip, v]) return results
def api(self, reqtype, endpoint, data=None, headers=None, ttl=180, error_msg=None): '''(CanvasLMSTool, str, dict or str, dict, int (number of seconds to live), str) -> json Return a json object which is the result of a Canvas API call to endpoint, and cache the request for ttl seconds. Raise an Exception with error_msg text in case of failure.''' endpoint = str(endpoint) assert reqtype in ['get', 'post', 'put', 'delete'] assert isinstance(endpoint, str) and endpoint.startswith('/') assert data is None or isinstance(data, dict) or isinstance(data, str) assert isinstance(headers, dict) or headers is None assert error_msg is None or isinstance(error_msg, str) token = self.get_canvas_user()['token'] error_msg = 'Failed to access Canvas. Location: ' + endpoint if error_msg is None else error_msg if '?' in endpoint: endpoint += '&access_token=' + token else: endpoint += '?access_token=' + token if reqtype == 'get': key = str('CanvasAPICall_' + str(hash128(endpoint + str(data) + str(headers)))) try: r = MC.get(key) if r is not None: return json.loads(r) except: cherrypy.log('error accessing memcache') cherrypy.log('Request for ' + endpoint + ' not cached. Key: ' + key) req = getattr(requests, reqtype) try: content = '' r = req(self.canvas_url + endpoint, data=data, headers=headers, verify=False) if r.status_code in [401, 403]: delete_all_cookies() raise cherrypy.HTTPRedirect(LOGOUT_URL) if r.status_code != 200: content = r.content except: raise Exception(error_msg + ' ' + str(r.status_code) + ' ' + str(content)) j = r.json() if reqtype == 'get': try: cherrypy.log('setting ' + key + ' :' + str(j)) MC.set(key, json.dumps(j), ttl) print MC.get(key) except: pass return j
def hash_all_func(data): if isinstance(data, str): data = data.encode('ascii') c, b = hashlittle2(data, 0, 0) v = mmh3.hash128(key=data, x64arch=True) return c, (v >> 16) & 0x0000FFFFFFFFFFFF, int(np.int64(np.uint64(v & 0xFFFFFFFFFFFFFFFF)))
def test_hashex_murmur(): assert proxenos.rendezvous.hashex(proxenos.rendezvous.HashMethod.MMH3_32, 'secret') == mmh3.hash('secret') assert proxenos.rendezvous.hashex(proxenos.rendezvous.HashMethod.MMH3_64, 'secret') == mmh3.hash64('secret')[0] assert proxenos.rendezvous.hashex(proxenos.rendezvous.HashMethod.MMH3_128, 'secret') == mmh3.hash128('secret')
def __init__(self, basic_block, sim): self.basic_block = basic_block self.buff = "" for i in self.basic_block.bb.get_instructions(): self.buff += dvm.clean_name_instruction(i) self.buff += dvm.static_operand_instruction(i) self.buff = self.buff.encode('UTF-8') self.hash = mmh3.hash128(self.buff)
def murmur_hash(): hash_result = mmh3.hash('google') pprint(hash_result) hash64_result = mmh3.hash64('amazon') pprint(hash64_result) hash128_result = mmh3.hash128('HugeHard') pprint(hash128_result)
def keyword_object(_type, **kwargs): "Create an object with multiple keys and values." j = dict(**kwargs) j["_type"] = _type j["_tool"] = TOOL j["_id"] = mmh3.hash128("|".join( (key.replace("|", "||") + "|" + value.replace("|", "||") for key, value in kwargs.items()))) return j
def hash_to_bucket(user_id, num_buckets): """Consistently hash `user_id` into buckets of length `num_buckets`. Approach derived from: https://stats.stackexchange.com/questions/26344/how-to-uniformly-project-a-hash-to-a-fixed-number-of-buckets """ i = mmh3.hash128(str(user_id)) p = i / float(2**128) for j in range(0, num_buckets): if j / float(num_buckets) <= p and (j + 1) / float(num_buckets) > p: return j + 1 return num_buckets
def getSketch(dnaStr,k,seedList): colNames = ['%d' %(i) for i in range(len(seedList))] sketch=[] for seed in seedList: hashvals = [ mmh3.hash128(compareFwdRev(dnaStr[i:i+k]),seed) for i in range(len(dnaStr)-k) ] sketch += [np.min(hashvals)] sketchSeries = pd.Series(data = sketch, index=colNames) return sketchSeries
def check(self, item): ''' Check for existence of an item in filter ''' for i in range(self.hash_count): digest = mmh3.hash128(item, i) % self.size if self.bit_array[digest] == False: # if any of bit is False then,its not present # in filter # else there is probability that it exist return False return True
def FM(stream, r): #r is the number of estimates needed salt = np.random.randint(1 << 30, size=r) z = [0] * r # z[i] counts the max no. trailing zeros for ith hash fn. for x in stream: for i in range(r): y = mmh3.hash128(str(x) + str(salt[i])) itob = bin(y)[2:] #convert integer to binary in string zeros = len(itob) - len( itob.rstrip('0')) #compute the trailing zeros z[i] = max(z[i], zeros) return z
def remap_items(filename, outfilename, feature_map, offer_field='offers', enumerate=False): """ Remap items :param filename: :param outfilename: :return: """ meta = RecordMeta(open(filename + '.meta').readline().strip().split()) Record = make_record_cls(meta.fields()) mapping = {} with open(outfilename, 'w') as outfile: for line in open(filename): splitted = line.strip().split('\t') rec = Record(*splitted) items = [offer for offer in getattr(rec, offer_field).split() if offer.isdigit()] if not items: continue counter_id = rec.counter_id new_items = [] for item in items: offer_hash = mmh3.hash128("%s_%s" % (counter_id, item)) if offer_hash not in mapping: index = len(mapping) mapping[offer_hash] = (index, counter_id, item) else: index = mapping[offer_hash][0] if enumerate: new_items.append(str(index)) else: new_items.append(str(offer_hash)) new_rec_data = dict([(f, getattr(rec, f)) for f in meta.fields()]) new_rec_data[offer_field] = ' '.join(new_items) new_rec = '\t'.join([new_rec_data[field] for field in meta.fields()]) outfile.write("%s\n" % new_rec) with open(feature_map, 'w') as fmap: for offer_hash, (i, counter_id, item) in mapping.iteritems(): fmap.write("%s\t%s\t%s\t%s\n" % (offer_hash, i, counter_id, item)) with open(feature_map + '.meta', 'w') as fmap: fmap.write("offer_hash\tmap\tcounter_id\toffer_id\n")
def return_design_matrix(self, decision_state, reward=None, weight=1, critic_model=False): """ Design matrix can simply return catesian product of state and decision For now all categorical features """ if self.model_class == 'lookup_table': return decision_state, reward else: state, decision_taken = decision_state state_namespace = " |state " + " ".join(state) + " " + "tag_" + str(mmh3.hash128("_".join(state))) decision_namespace = " |decision " + "action_" + str(decision_taken) input_str = state_namespace + decision_namespace + '\n' # Do this after cache retrieval if reward: output = str(reward) + " " + str(weight) fv = output + input_str else: fv = input_str return fv, reward
def lookup(self, new_string): a,b = mmh3.hash64(new_string) if(self.bloom_array[mmh3.hash(new_string) % 1000000] == 0 or self.bloom_array[mmh3.hash128(new_string)%1000000] == 0 or self.bloom_array[a % 1000000] == 0 or self.bloom_array[b%1000000] == 0): return False else: return True
def genPrimaryKey64(data): return "%x" % (mmh3.hash128(data) & 0xFFFFFFFFFFFFFFFF)
def add(self, new_string): self.bloom_array[mmh3.hash(new_string) % 1000000] = 1 self.bloom_array[mmh3.hash128(new_string) % 1000000] = 1 a,b = mmh3.hash64(new_string) self.bloom_array[a % 1000000] = 1 self.bloom_array[b % 1000000] = 1
def hash_string(s): return "html2latex_{version}_{mmh3_hash}_{hmac_of_sha512_hash}".format( version=VERSION, mmh3_hash=mmh3.hash128(s), hmac_of_sha512_hash=hmac.new(hashlib.sha512(s).hexdigest()).hexdigest(), )
def test_hash_128(self): h = mmh3.hash128('hello') assert h == 121118445609844952839898260755277781762
def hashFiles(files): fileMap = {} for f in files: fileMap[mmh3.hash128(f)] = f return fileMap
def obfuscateDecimal(self, blob): return mmh3.hash128(blob) & 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
def _hash(self, item): bloom = 0 for salt in self.salts: bloom |= (1L << (mmh3.hash128(salt + str(item)) % self.m)) return bloom
def build_key(params): joined = ','.join(['%s=%s' % (PersonCounter.PARAM_KEYS[i], value) for i, value in enumerate(params) if value]) return 'person-count__%s' % mmh3.hash128(joined)
def sparsify(self, x): sparse_x = np.nonzero(x)[0] tag = str(mmh3.hash128("_".join('pix_' + str(i) for i in sparse_x))) state = " |state " + " ".join('pix_' + str(i) for i in sparse_x) + " tag_" + tag return state