def run_gpu_inference(topk=100, index_file=INDEX_FILE, ef_search=300): print("=" * BARRIER_SIZE) data_path = pjoin(RES_DIR, DATA_FILE) index_path = pjoin(RES_DIR, index_file) LOGGER.info("gpu inference on %s with index %s", data_path, index_path) ch0 = CuHNSW(OPT) LOGGER.info("load model from %s by cuhnsw", index_path) ch0.load_index(index_path) h5f = h5py.File(data_path, "r") queries = h5f["test"][:, :].astype(np.float32) neighbors = h5f["neighbors"][:, :topk].astype(np.int32) h5f.close() num_queries = queries.shape[0] queries /= np.linalg.norm(queries, axis=1)[:, None] start = time.time() pred_nn, _, _ = ch0.search_knn(queries, topk, ef_search) el0 = time.time() - start LOGGER.info("elapsed for inferencing %d queries of top@%d (ef_search: %d): " "%.4e sec", num_queries, topk, ef_search, el0) accs = [] for _pred_nn, _gt_nn in zip(pred_nn, neighbors): intersection = set(_pred_nn) & set(_gt_nn) acc = len(intersection) / float(topk) accs.append(acc) LOGGER.info("accuracy mean: %.4e, std: %.4e", np.mean(accs), np.std(accs)) return el0, np.mean(accs)
def run_gpu_inference2(topk=5, index_file="cuhnsw.index", ef_search=300): print("=" * BARRIER_SIZE) data_path = pjoin(RES_DIR, DATA_FILE) index_path = pjoin(RES_DIR, index_file) LOGGER.info("gpu inference on %s with index %s", data_path, index_path) ch0 = CuHNSW(OPT) LOGGER.info("load model from %s by cuhnsw", index_path) ch0.load_index(index_path) h5f = h5py.File(data_path, "r") data = h5f["train"][:, :].astype(np.float32) queries = h5f["test"][:5, :].astype(np.float32) h5f.close() if NRZ: data /= np.linalg.norm(data, axis=1)[:, None] nns, distances, found_cnt = ch0.search_knn(queries[:5], topk, ef_search) for idx, (nn0, distance, cnt) in \ enumerate(zip(nns, distances, found_cnt)): print("=" * BARRIER_SIZE) print(f"query {idx + 1}") print("-" * BARRIER_SIZE) for _idx, (_nn, _dist) in enumerate(zip(nn0[:cnt], distance[:cnt])): if DIST_TYPE == "l2": real_dist = np.linalg.norm(data[_nn] - queries[idx]) _dist = np.sqrt(_dist) elif DIST_TYPE == "dot": real_dist = data[_nn].dot(queries[idx]) print(f"rank {_idx + 1}. neighbor: {_nn}, dist by lib: {_dist}, " f"actual dist: {real_dist}")
def run_gpu_inference_large(topk=100, index_file=INDEX_FILE, ef_search=300, num_queries=1000000, num_dims=50): print("=" * BARRIER_SIZE) index_path = pjoin(RES_DIR, index_file) data_path = pjoin(RES_DIR, DATA_FILE) LOGGER.info("gpu inference on %s with index %s", data_path, index_path) ch0 = CuHNSW(OPT) LOGGER.info("load model from %s by cuhnsw", index_path) ch0.load_index(index_path) queries = np.random.normal(size=(num_queries, num_dims)).astype(np.float32) num_queries = queries.shape[0] queries /= np.linalg.norm(queries, axis=1)[:, None] start = time.time() _, _, _ = ch0.search_knn(queries, topk, ef_search) el0 = time.time() - start LOGGER.info("elapsed for inferencing %d queries of top@%d (ef_search: %d): " "%.4e sec", num_queries, topk, ef_search, el0) return el0