def fit(self, X): X = numpy.array(X) X = X.astype(numpy.float32) self._index = HnswIndex(X.shape[1], "L2") for el in X: self._index.add_data(el) self._index.build(m=self._m, n_threads=self._threads)
class N2(BaseANN): def __init__(self, m): threads = 8 self.name = 'N2(m={}, threads={})'.format(m,threads) self._m = m self._threads = threads self._index = None print("Init done") def fit(self, X): X = numpy.array(X) X = X.astype(numpy.float32) self._index = HnswIndex(X.shape[1],"L2") print("Shape", X.shape[1]) for el in X: self._index.add_data(el) self._index.build(m=self._m, n_threads=self._threads) print("Fit done") def query(self, v, n): v = v.astype(numpy.float32) #print(v) #print(n) #print("-----------------------------------") nns = self._index.search_by_vector(v,n) #print("[search_by_vector]: Nearest neighborhoods of vector {}: {}".format(v, nns)) return nns def use_threads(self): return False
def test01_most_similar(self): set_log_level(1) model = self.load_text8_model() index = HnswIndex(model.L0.shape[1]) model.normalize('item') for f in model.L0: index.add_data(f) index.build(n_threads=4) index.save('n2.bin') par = ParW2V(model) model.opt.num_workers = 1 all_keys = model._idmanager.itemids[::][:10000] start_t = time.time() [model.most_similar(k, topk=10) for k in all_keys] naive_elapsed = time.time() - start_t par.num_workers = 4 start_t = time.time() par.most_similar(all_keys, topk=10, repr=True) par_elapsed = time.time() - start_t start_t = time.time() par.set_hnsw_index('n2.bin', 'item') par.most_similar(all_keys, topk=10, repr=True) ann_elapsed = time.time() - start_t self.assertTrue(naive_elapsed > par_elapsed * 1.5 > ann_elapsed * 5.0, msg=f'{naive_elapsed} > {par_elapsed} > {ann_elapsed}') index.unload() os.remove('n2.bin')
def test01_small_invalid_dimension(self): index = HnswIndex(30) this_is_abnormal = False try: index.load(self.model_fname) this_is_abnormal = True except: pass finally: del index self.assertFalse(this_is_abnormal)
def test_search_by_vector(self): f = 3 i = HnswIndex(f) i.add_data([0, 0, 1]) i.add_data([0, 1, 0]) i.add_data([1, 0, 0]) i.build(max_m0=10, m=5) self.assertEqual(i.search_by_vector([3, 2, 1], 3), [2, 1, 0]) self.assertEqual(i.search_by_vector([1, 2, 3], 3), [0, 1, 2]) self.assertEqual(i.search_by_vector([2, 0, 1], 3), [2, 0, 1])
def test_search_by_vector(self): f = 2 i = HnswIndex(f, 'L2') i.add_data([2, 2]) i.add_data([3, 2]) i.add_data([3, 3]) i.build() self.assertEqual(i.search_by_vector([4, 4], 3), [2, 1, 0]) self.assertEqual(i.search_by_vector([1, 1], 3), [0, 1, 2]) self.assertEqual(i.search_by_vector([4, 2], 3), [1, 2, 0])
def test02_small_invalid_dimension2(self): index = HnswIndex(80) this_is_abnormal = False try: v = [random.gauss(0, 1) for z in xrange(100)] index.add_data(v) this_is_abnormal = True except: pass finally: del index self.assertFalse(this_is_abnormal)
def test_search_by_id(self): f = 3 i = HnswIndex(f) i.add_data([2, 1, 0]) i.add_data([1, 2, 0]) i.add_data([0, 0, 1]) i.build(max_m0=10) self.assertEqual(i.search_by_id(0, 3), [0, 1, 2]) self.assertEqual(i.search_by_id(1, 3), [1, 0, 2]) self.assertTrue(i.search_by_id(2, 3) in [[2, 0, 1], [2, 1, 0]]) # could be either
def test_search_by_id(self): f = 2 i = HnswIndex(f, 'L2') i.add_data([2, 2]) i.add_data([3, 2]) i.add_data([3, 3]) i.build() self.assertEqual(i.search_by_id(0, 3), [0, 1, 2]) self.assertEqual(i.search_by_id(2, 3), [2, 1, 0])
def setUpClass(self): index = HnswIndex(self.dim) for i in xrange(self.data_num): v = [random.gauss(0, 1) for z in xrange(self.dim)] index.add_data(v) index.build(n_threads=12) index.save(self.model_fname)
def example2(): log.set_log_level(log.INFO) als_option = ALSOption().get_default_option() data_option = MatrixMarketOptions().get_default_option() data_option.input.main = '../tests/ext/ml-20m/main' data_option.input.iid = '../tests/ext/ml-20m/iid' data_option.data.path = './ml20m.h5py' data_option.data.use_cache = True als = ALS(als_option, data_opt=data_option) als.initialize() als.train() als.normalize('item') als.build_itemid_map() print( 'Make item recommendation on als.ml20m.par.top10.tsv with Paralell(Thread=4)' ) par = ParALS(als) par.num_workers = 4 all_items = als._idmanager.itemids start_t = time.time() with open('als.ml20m.par.top10.tsv', 'w') as fout: for idx in range(0, len(all_items), 128): topks, _ = par.most_similar(all_items[idx:idx + 128], repr=True) for q, p in zip(all_items[idx:idx + 128], topks): fout.write('%s\t%s\n' % (q, '\t'.join(p))) print('took: %.3f secs' % (time.time() - start_t)) from n2 import HnswIndex index = HnswIndex(als.Q.shape[1]) for f in als.Q: index.add_data(f) index.build(n_threads=4) index.save('ml20m.n2.index') index.unload() print( 'Make item recommendation on als.ml20m.par.top10.tsv with Ann(Thread=1)' ) par.set_hnsw_index('ml20m.n2.index', 'item') par.num_workers = 4 start_t = time.time() with open('als.ml20m.ann.top10.tsv', 'w') as fout: for idx in range(0, len(all_items), 128): topks, _ = par.most_similar(all_items[idx:idx + 128], repr=True) for q, p in zip(all_items[idx:idx + 128], topks): fout.write('%s\t%s\n' % (q, '\t'.join(p))) print('took: %.3f secs' % (time.time() - start_t))
def fit(self, X): if self._metric == 'euclidean': self._n2 = HnswIndex(X.shape[1], 'L2') else: self._n2 = HnswIndex(X.shape[1]) if os.path.exists(self._index_name): n2_logger.info("Loading index from file") self._n2.load(self._index_name, use_mmap=False) return n2_logger.debug("Create Index") for i, x in enumerate(X): self._n2.add_data(x) self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads) self._n2.save(self._index_name)
class N2(BaseANN): def __init__(self, m, ef_construction, n_threads, ef_search, metric, batch): self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d%s" % (m, ef_construction, n_threads, ef_search, '_batch' if batch else '') self._m = m self._m0 = m * 2 self._ef_construction = ef_construction self._n_threads = n_threads self._ef_search = ef_search self._index_name = os.path.join(CACHE_DIR, "index_n2_%s_M%d_efCon%d_n_thread%s" % (args.dataset, m, ef_construction, n_threads)) self._metric = metric def fit(self, X): if self._metric == 'euclidean': self._n2 = HnswIndex(X.shape[1], 'L2') elif self._metric == 'dot': self._n2 = HnswIndex(X.shape[1], 'dot') else: self._n2 = HnswIndex(X.shape[1]) if os.path.exists(self._index_name): n2_logger.info("Loading index from file") self._n2.load(self._index_name, use_mmap=False) return n2_logger.info("Create Index") for i, x in enumerate(X): self._n2.add_data(x) self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads) self._n2.save(self._index_name) def query(self, v, n): return self._n2.search_by_vector(v, n, self._ef_search) def batch_query(self, X, n): self.b_res = self._n2.batch_search_by_vectors(X, n, self._ef_search, self._n_threads) def get_batch_results(self): return self.b_res def __str__(self): return self.name
def fit(self, X): if self._metric == 'euclidean': self._n2 = HnswIndex(X.shape[1], 'L2') else: self._n2 = HnswIndex(X.shape[1]) if os.path.exists(self._index_name): n2_logger.info("Loading index from file") self._n2.load(self._index_name) else: n2_logger.info("Index file is not exist: {0}".format( self._index_name)) n2_logger.info("Start fitting") for i, x in enumerate(X): self._n2.add_data(x.tolist()) self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads) self._n2.save(self._index_name)
class N2(BaseANN): def __init__(self, m): threads = 8 self.name = 'N2(m={}, threads={})'.format(m, threads) self._m = m self._threads = threads self._index = None def fit(self, X): X = numpy.array(X) X = X.astype(numpy.float32) self._index = HnswIndex(X.shape[1], "L2") for el in X: self._index.add_data(el) self._index.build(m=self._m, n_threads=self._threads) def query(self, v, n): v = v.astype(numpy.float32) nns = self._index.search_by_vector(v, n) return nns
def build_n2(self): t = self.tfs_by_doc all_words = [] mapper = {'from_hnsw': {}, 'from_doc_id': {}} # build all_words for doc_id in t.keys(): for word in t[doc_id].keys(): if word not in all_words: all_words.append(word) col_len = len(all_words) hnsw = HnswIndex(dimension=col_len, metric='angular') for h_idx, doc_id in enumerate( tqdm(list(t.keys()), desc="Build N2 Search Space")): assert h_idx not in mapper['from_hnsw'] mapper['from_hnsw'][h_idx] = doc_id mapper['from_doc_id'][doc_id] = h_idx parchment = np.zeros(col_len, dtype=np.uint16) for word, count in t[doc_id].items(): word_idx = all_words.index(word) parchment[word_idx] = count hnsw.add_data(parchment) hnsw.build(n_threads=4) self.n2 = {'hnsw': hnsw, 'mapper': mapper, 'all_words': all_words}
def gen_item_index(model): article_embedding_matrix = model.get_layer('E-Article').get_weights()[0] embedding_size = article_embedding_matrix.shape[1] index = HnswIndex(embedding_size) for embedding in article_embedding_matrix: index.add_data(embedding) index.build(n_threads=4) article_to_id = load_data('article_to_id') id_to_article = {v: k for k, v in article_to_id.items()} def most_similar(item, topn=100, threshold=0.3): if item not in id_to_article: return [] output = [] iid = id_to_article[item] for tiid in [ e[0] for e in index.search_by_id( iid, topn * 2, include_distances=True) if e[1] < threshold ][1:]: target_item = id_to_article[tiid] output.append(target_item) if len(output) == topn: break return output return most_similar
class N2(BaseANN): def __init__(self, m, ef_construction, n_threads, ef_search, metric): self._m = m self._m0 = m * 2 self._ef_construction = ef_construction self._n_threads = n_threads self._ef_search = ef_search self._index_name = os.path.join( INDEX_DIR, "youtube_n2_M%d_efCon%d_n_thread%s" % (m, ef_construction, n_threads)) self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d" % ( m, ef_construction, n_threads, ef_search) self._metric = metric d = os.path.dirname(self._index_name) if not os.path.exists(d): os.makedirs(d) def fit(self, X): from n2 import HnswIndex if self._metric == 'euclidean': self._n2 = HnswIndex(X.shape[1], 'L2') else: self._n2 = HnswIndex(X.shape[1]) if os.path.exists(self._index_name): logging.debug("Loading index from file") self._n2.load(self._index_name) else: logging.debug("Index file is not exist: {0}".format( self._index_name)) logging.debug("Start fitting") for i, x in enumerate(X): self._n2.add_data(x.tolist()) self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads) self._n2.save(self._index_name) def query(self, v, n): return self._n2.search_by_vector(v.tolist(), n, self._ef_search) def __str__(self): return self.name
def get_user_embeddings(user_model, seens_total, user_list, batch_size=10000): inputs = [[], [], [], [], []] includes = [] user_embeddings = {} for user, seens in tqdm(seens_total.items(), desc='user embedding'): seens = seens_total[user] if seens: includes.append(user) sequence_info = get_sequential_feature(user, seens['articles'], seens['ages'], data_type='test', random_range=False, random_sample_length=False, positive=True) article_sequence, magazine_sequence, author_sequence, user_feature_sequence, target_age, target = sequence_info search_keyword_sequence = get_search_keyword_feature(user) inputs[0].append(article_sequence) inputs[1].append(magazine_sequence) inputs[2].append(author_sequence) inputs[3].append(user_feature_sequence) inputs[4].append(search_keyword_sequence) inputs = [np.asarray(x) for x in inputs] predicts = user_model.predict(inputs, batch_size=batch_size) user_index = HnswIndex(200) for embedding in predicts: user_index.add_data(embedding) user_index.build(n_threads=multiprocessing.cpu_count()) user_to_id = {user: i for i, user in enumerate(includes)} id_to_user = {v: k for k, v in user_to_id.items()} for user in user_list: if user in user_to_id: user_embeddings[user] = predicts[user_to_id[user]] else: user_embeddings[user] = None def most_similar(user, topn=100, threshold=0.3): if user not in user_to_id: return [] output = [] uid = user_to_id[user] for tuid in [ e[0] for e in user_index.search_by_id( uid, topn * 2, include_distances=True) if e[1] < threshold ][1:]: target_user = id_to_user[tuid] output.append(target_user) if len(output) == topn: break return output return user_embeddings, most_similar
def find_edges(input, test, K): print(f"building kNN classifier ... ", end=" ") st_time = time.time() if kNN_type <= 3: input, test = input.todense(), test.todense() if kNN_type == 1: from sklearn.neighbors import NearestNeighbors tree = NearestNeighbors(n_neighbors=K + 1, algorithm='ball_tree').fit(input) elif kNN_type == 2: from scipy import spatial tree = spatial.KDTree(input) elif kNN_type == 3: from n2 import HnswIndex tree = HnswIndex(input.shape[1], distance_type) # distance_type in ['angular', 'L2'] for index in tqdm(range(input.shape[0])): tree.add_data(input[index, :]) tree.build(n_threads=10) elif kNN_type == 4: import pysparnn.cluster_index as ci input_num = input.shape[0] tree = ci.MultiClusterIndex(input, range(input_num)) else: raise NotImplementedError print(f"time={time.time()-st_time:.3f}s") print("finding indices ... ", end=" ") if kNN_type == 1: _, indices = tree.kneighbors(test) elif kNN_type == 2: _, indices = tree.query(test, k=K + 1) elif kNN_type == 3: indices = [] for i in tqdm(range(test.shape[0])): indices.append(tree.search_by_vector(test[i, :], k=K + 1)) else: indices = tree.search(test, k=K+1, k_clusters=100, return_distance=False) print(f"time={time.time()-st_time:.3f}s") edge_list = [] for index1, per in enumerate(indices): for index2 in per: index2 = int(index2) if index1 != index2: edge_list.append((index1, index2)) print(f"done! .... time={time.time()-st_time:.3f}s") return edge_list
def test_large_index(self): # Generate pairs of random points where the pair is super close f = 10 # q = [random.gauss(0, 10) for z in xrange(f)] i = HnswIndex(f, 'L2') for j in xrange(0, 10000, 2): p = [random.gauss(0, 1) for z in xrange(f)] x = [1 + pi + random.gauss(0, 1e-2) for pi in p] # todo: should be q[i] y = [1 + pi + random.gauss(0, 1e-2) for pi in p] i.add_data(x) i.add_data(y) i.build() for j in xrange(0, 10000, 2): self.assertEqual(i.search_by_id(j, 2), [j, j + 1]) self.assertEqual(i.search_by_id(j + 1, 2), [j + 1, j])
def kNN(matrix: np.ndarray, k: int) -> List[float]: index = HnswIndex(matrix.shape[1], 'L2') for sample in matrix: index.add_data(sample) index.build(m=32, max_m0=48, ef_construction=int(k * 1.1), n_threads=cpu_count()) result = [] for i in range(0, matrix.shape[0]): results = index.search_by_id(i, k, include_distances=True) result.append(np.mean(np.sqrt(np.array([dist for _, dist in results])))) return np.sort(result)
def test04_batch_search_by_ids(self): index = HnswIndex(self.dim) index.load(self.model_fname) T = [random.randrange(0, self.data_num) for y in xrange(100)] batch_res = index.batch_search_by_ids(T, 10, num_threads=12, include_distances=True) normal_res = [ index.search_by_id(t, 10, include_distances=True) for t in T ] self.assertEqual(batch_res, normal_res)
def test04_batch_search_by_vectors(self): index = HnswIndex(self.dim) index.load(self.model_fname) T = [[random.gauss(0, 1) for z in xrange(self.dim)] for y in xrange(100)] batch_res = index.batch_search_by_vectors(T, 10, num_threads=12, include_distances=True) normal_res = [ index.search_by_vector(t, 10, include_distances=True) for t in T ] self.assertEqual(batch_res, normal_res)
def test03_small_add_data_after_loading(self): index = HnswIndex(self.dim) index.load(self.model_fname) this_is_abnormal = False try: v = [random.gauss(0, 1) for z in xrange(self.dim)] index.add_data(v) this_is_abnormal = True except: pass finally: del index self.assertFalse(this_is_abnormal)
def precision(self, n, n_trees=10, n_points=10000, n_rounds=10): found = 0 for r in xrange(n_rounds): # create random points at distance x from (1000, 0, 0, ...) f = 10 i = HnswIndex(f, 'L2') for j in xrange(n_points): p = [random.gauss(0, 1) for z in xrange(f - 1)] norm = sum([pi**2 for pi in p])**0.5 x = [1000] + [pi / norm * j for pi in p] i.add_data(x) i.build() nns = i.search_by_vector([1000] + [0] * (f - 1), n) self.assertEqual(nns, sorted(nns)) # should be in order # The number of gaps should be equal to the last item minus n-1 found += len([_x for _x in nns if _x < n]) return 1.0 * found / (n * n_rounds)
from n2 import HnswIndex import random f = 3 t = HnswIndex(f) # HnswIndex(f, "L2 or angular") for i in xrange(1000): v = [random.gauss(0, 1) for z in xrange(f)] t.add_data(v) t.build(m=5, max_m0=10, n_threads=4) t.save('test.n2') u = HnswIndex(f, "angular") u.load('test.n2') search_id = 1 k = 3 neighbor_ids = u.search_by_id(search_id, k) print( "[search_by_id]: Nearest neighborhoods of id {}: {}".format( search_id, neighbor_ids)) example_vector_query = [random.gauss(0, 1) for z in xrange(f)] nns = u.search_by_vector(example_vector_query, k, include_distances=True) print( "[search_by_vector]: Nearest neighborhoods of vector {}: {}".format( example_vector_query, nns))
def _buffalo(algo_name, database): repeat = 3 options = {'als': {'num_workers': 4, 'compute_loss_on_training': False, 'd': 32, 'num_iters': 10}, 'bpr': {'num_workers': 4, 'compute_loss_on_training': False, 'd': 32, 'num_iters': 100}, } opt = options[algo_name] # linear if algo_name == 'als': PAR = ParALS model = BuffaloLib().als(database, return_instance_before_train=True, **opt) elif algo_name == 'bpr': PAR = ParBPRMF model = BuffaloLib().bpr(database, return_instance_before_train=True, **opt) model.train() model.build_itemid_map() model.normalize('item') # parallel par = PAR(model) # ann index = HnswIndex(model.P.shape[1]) for f in model.P: index.add_data(f) index.build(n_threads=4) index.save('bm_n2.bin') ann = PAR(model) ann.set_hnsw_index('bm_n2.bin', 'item') total_queries = 10000 keys = model._idmanager.itemids[::][:total_queries] print('Total queries: %s' % len(keys)) results = {} nn_opts = {'topk': 10} for p, m in [('S', model), ('P', par), ('A', ann)]: results[p] = {} opt = nn_opts.copy() if not isinstance(m, PAR): opt['iterable'] = keys for num_workers in [1, 2, 4]: if isinstance(m, PAR): m.num_workers = num_workers else: m.opt.num_workers = num_workers opt['model'] = m elapsed, memory_usage = _get_elapsed_time('most_similar', keys, BuffaloLib(), repeat, **opt) s = elapsed / len(keys) results[p][f'S={num_workers}'] = s results[p][f'E={num_workers}'] = elapsed results[p][f'M={num_workers}'] = memory_usage['max'] results[p][f'A={num_workers}'] = memory_usage['avg'] results[p][f'B={num_workers}'] = memory_usage['min'] print(f'{p}M={num_workers} {elapsed} {memory_usage}') return results
def find_edges(input, test, K): print(f"\tbuilding kNN classifier ... ", end=" ") st_time = time.time() if kNN_type in [1, 2]: input, test = input.todense(), test.todense() if kNN_type == 1: from sklearn.neighbors import NearestNeighbors tree = NearestNeighbors(n_neighbors=K + 1, algorithm='ball_tree').fit(input) elif kNN_type == 2: from scipy import spatial tree = spatial.KDTree(input) elif kNN_type == 3: from n2 import HnswIndex tree = HnswIndex(input.shape[1], distance_type) # distance_type in ['angular', 'L2'] for index in tqdm(range(input.shape[0])): tree.add_data(input[index, :]) tree.build(n_threads=20) elif kNN_type == 4: import pysparnn.cluster_index as ci input_num = input.shape[0] tree = ci.MultiClusterIndex(input, range(input_num)) elif kNN_type == 5: import nmslib M, efC = 30, 100 index_time_params = {'M': M, 'indexThreadQty': num_threads, 'efConstruction': efC, 'post': 0} space_names = ['l2_sparse', 'cosinesimil_sparse'] # https://github.com/nmslib/nmslib/blob/master/manual/spaces.md space_name = space_names[0] data_type = nmslib.DataType.SPARSE_VECTOR tree = nmslib.init(method='hnsw', space=space_name, data_type=data_type) ''' def calc_zero_rows(i): if input[i, :].getnnz() == 0: return 1 else: return 0 pool = Pool(num_threads) zero_row_num = sum(pool.map(calc_zero_rows, range(input.shape[0]))) print(f"# zero rows in input = {zero_row_num}", end=" ") ''' tree.addDataPointBatch(input) tree.createIndex(index_time_params, print_progress=True) # Setting query-time parameters efS = 100 query_time_params = {'efSearch': efS} print('Setting query-time parameters', query_time_params, end=" ") tree.setQueryTimeParams(query_time_params) else: raise NotImplementedError print(f"time={time.time()-st_time:.3f}s") print("\tfinding indices ... ", end=" ") if kNN_type == 1: _, indices = tree.kneighbors(test) elif kNN_type == 2: _, indices = tree.query(test, k=K + 1) elif kNN_type == 3: indices = [] for i in tqdm(range(test.shape[0])): indices.append(tree.search_by_vector(test[i, :], k=K + 1)) elif kNN_type == 4: indices = tree.search(test, k=K+1, k_clusters=100, return_distance=False) elif kNN_type == 5: ''' def calc_zero_rows2(i): if test[i, :].getnnz() == 0: return 1 else: return 0 pool = Pool(num_threads) zero_row_num = sum(pool.map(calc_zero_rows2, range(test.shape[0]))) print(f"# zero rows in test = {zero_row_num}") ''' indices_ = tree.knnQueryBatch(test, k=K+1, num_threads=num_threads) indices = [i[0] for i in indices_] del indices_ else: raise NotImplementedError print(f"time={time.time()-st_time:.3f}s") edge_list = [] for index1, per in enumerate(indices): assert len(per) == K+1, f"index1={index1} len(per)={len(per)} != K={K}" for index2 in per: index2 = int(index2) if index1 != index2: edge_list.append((index1, index2)) print(f"\tget edges done! .... time={time.time()-st_time:.3f}s") return edge_list
f.close() fg.close() fq.close() ff.close() # N2 f = open('n2_results.txt', 'a') fg = open('n2_fg.txt', 'a') fq = open('n2_fq.txt', 'a') ff = open('n2_ff.txt', 'a') M_vec = [4, 8, 12, 16, 24, 36, 48, 64, 96] _k = [10, 20, 40, 80, 120, 200, 400, 600, 800] for M in M_vec: break start_graph = time.time() t = HnswIndex(128) for i in range(len(xb)): t.add_data(xb[i]) t.build(m=M, ef_construction=500) end_graph = time.time() for kk in _k: print("M:", M, "kk:", kk) start_query = time.time() accuracy = 0 for i in range(len(xq)): ans = t.search_by_vector(xq[i], k, kk) for x in ans: if x in gt[i]: accuracy += 1