예제 #1
0
파일: dataset.py 프로젝트: d4l3k/ourgraph
def filter_uids(uids: List[str], train: bool) -> List[str]:
    """
    filter_uids returns 5% of the uids if train is false. 95% otherwise.
    uses hashing to remain consistent
    """
    return [
        uid for uid in uids if (cityhash.CityHash64(uid) % 20 == 0) != train
    ]
예제 #2
0
                             for i in cond]), full))).intdigest()
print(f'03: {time.time() - t0}')

# (4) xxh64 + json + inspect.getsource (64):
t0 = time.time()
for i in range(N):
    xxhash.xxh64(
        json.dumps(
            (attr, rel, str([inspect.getsource(i) for i in cond]), full),
            sort_keys=True)).intdigest()
print(f'04: {time.time() - t0}')

# (5) cityhash + str + inspect.getsource (64):
t0 = time.time()
for i in range(N):
    cityhash.CityHash64(
        str((attr, rel, str([inspect.getsource(i) for i in cond]), full)))
print(f'05: {time.time() - t0}')

# (6) cityhash + json + inspect.getsource (64):
t0 = time.time()
for i in range(N):
    cityhash.CityHash64(
        json.dumps(
            (attr, rel, str([inspect.getsource(i) for i in cond]), full),
            sort_keys=True))
print(f'06: {time.time() - t0}')

# (7) murmur3 + str + inspect.getsource (32):
t0 = time.time()
for i in range(N):
    mmh3.hash(str(
예제 #3
0
def h(x):
    """
    Compute CityHash of any object.
    Can be used to construct features.
    """
    return cityhash.CityHash64(repr(x))
예제 #4
0
파일: shuffle.py 프로젝트: srkm009/datasets
def _get_hashed_key(key):
    """Returns hash (int) for given key."""
    if not isinstance(key, (six.string_types, bytes)):
        key = str(key)
    return cityhash.CityHash64(key)
예제 #5
0
def tag_embed_idx(tag: str) -> int:
    return cityhash.CityHash64(tag) % TAG_EMBEDDING_SIZE
예제 #6
0
        files_set.add(buf)
        # files_set.append(buf)

total = 0
#total = len(files_set)
pkt_size = 0
while (len(files_set)):
    #    print "len = %d" % len(files_set)
    buf = files_set.pop()
    pkt = buf.split(" ")
    #    print pkt
    pkt_size = pkt_size + int(pkt[1])
    for j in range(0, (int(pkt[1]) / PKT_NUM + 1)):
        total = total + 1
        key = "/D1/" + pkt[0] + "-" + str(j)
        addr = cityhash.CityHash64(key)
        addr = addr % SLOT_NUM
        if (dram_dict.has_key(addr)):
            #files refere to  list
            files = dram_dict[addr]
            if (files.count(key) == 0):
                if (len(files) == FILE_NUM):
                    collision_cnt = collision_cnt + 1
                else:
                    files.append(key)
                # dram_dict[addr] = files
        else:
            files = []
            files.append(key)
            dram_dict[addr] = files
#    i = i + 1