def test_query_iterator(self, metric=faiss.METRIC_L2): ds = datasets.SyntheticDataset(32, 0, 1000, 1000) xq = ds.get_queries() xb = ds.get_database() D, I = faiss.knn(xq, xb, 10, metric=metric) threshold = float(D[:, -1].mean()) print(threshold) index = faiss.IndexFlat(32, metric) index.add(xb) ref_lims, ref_D, ref_I = index.range_search(xq, threshold) def matrix_iterator(xb, bs): for i0 in range(0, xb.shape[0], bs): yield xb[i0:i0 + bs] # check repro OK _, new_lims, new_D, new_I = range_search_max_results( index, matrix_iterator(xq, 100), threshold) evaluation.test_ref_range_results(ref_lims, ref_D, ref_I, new_lims, new_D, new_I) max_res = ref_lims[-1] // 2 new_threshold, new_lims, new_D, new_I = range_search_max_results( index, matrix_iterator(xq, 100), threshold, max_results=max_res) self.assertLessEqual(new_lims[-1], max_res) ref_lims, ref_D, ref_I = index.range_search(xq, new_threshold) evaluation.test_ref_range_results(ref_lims, ref_D, ref_I, new_lims, new_D, new_I)
def do_test(self, metric_type): ds = datasets.SyntheticDataset(32, 0, 1000, 200) index = faiss.IndexFlat(ds.d, metric_type) index.add(ds.get_database()) # find a reasonable radius D, _ = index.search(ds.get_queries(), 10) radius0 = float(np.median(D[:, -1])) # baseline = search with that radius lims_ref, Dref, Iref = index.range_search(ds.get_queries(), radius0) # now see if using just the total number of results, we can get back the same # result table query_iterator = exponential_query_iterator(ds.get_queries()) init_radius = 1e10 if metric_type == faiss.METRIC_L2 else -1e10 radius1, lims_new, Dnew, Inew = range_search_max_results( index, query_iterator, init_radius, min_results=Dref.size, clip_to_min=True) evaluation.test_ref_range_results(lims_ref, Dref, Iref, lims_new, Dnew, Inew)