def filter_uids(uids: List[str], train: bool) -> List[str]: """ filter_uids returns 5% of the uids if train is false. 95% otherwise. uses hashing to remain consistent """ return [ uid for uid in uids if (cityhash.CityHash64(uid) % 20 == 0) != train ]
for i in cond]), full))).intdigest() print(f'03: {time.time() - t0}') # (4) xxh64 + json + inspect.getsource (64): t0 = time.time() for i in range(N): xxhash.xxh64( json.dumps( (attr, rel, str([inspect.getsource(i) for i in cond]), full), sort_keys=True)).intdigest() print(f'04: {time.time() - t0}') # (5) cityhash + str + inspect.getsource (64): t0 = time.time() for i in range(N): cityhash.CityHash64( str((attr, rel, str([inspect.getsource(i) for i in cond]), full))) print(f'05: {time.time() - t0}') # (6) cityhash + json + inspect.getsource (64): t0 = time.time() for i in range(N): cityhash.CityHash64( json.dumps( (attr, rel, str([inspect.getsource(i) for i in cond]), full), sort_keys=True)) print(f'06: {time.time() - t0}') # (7) murmur3 + str + inspect.getsource (32): t0 = time.time() for i in range(N): mmh3.hash(str(
def h(x): """ Compute CityHash of any object. Can be used to construct features. """ return cityhash.CityHash64(repr(x))
def _get_hashed_key(key): """Returns hash (int) for given key.""" if not isinstance(key, (six.string_types, bytes)): key = str(key) return cityhash.CityHash64(key)
def tag_embed_idx(tag: str) -> int: return cityhash.CityHash64(tag) % TAG_EMBEDDING_SIZE
files_set.add(buf) # files_set.append(buf) total = 0 #total = len(files_set) pkt_size = 0 while (len(files_set)): # print "len = %d" % len(files_set) buf = files_set.pop() pkt = buf.split(" ") # print pkt pkt_size = pkt_size + int(pkt[1]) for j in range(0, (int(pkt[1]) / PKT_NUM + 1)): total = total + 1 key = "/D1/" + pkt[0] + "-" + str(j) addr = cityhash.CityHash64(key) addr = addr % SLOT_NUM if (dram_dict.has_key(addr)): #files refere to list files = dram_dict[addr] if (files.count(key) == 0): if (len(files) == FILE_NUM): collision_cnt = collision_cnt + 1 else: files.append(key) # dram_dict[addr] = files else: files = [] files.append(key) dram_dict[addr] = files # i = i + 1