def __init__(self, metric: str, dimension: int): self.name = f"eknn-exact-metric={metric}_dimension={dimension}" self.metric = metric self.dimension = dimension self.model = ElastiknnModel("exact", dealias_metric(metric)) self.batch_res = None es_wait()
def test_exact_jaccard_mnist(self): # First run the query and make sure the results have the right form. n_neighbors = 20 model = ElastiknnModel('exact', 'jaccard') model.fit(digits_train) inds1 = model.kneighbors(digits_validate, n_neighbors) inds2, dists2 = model.kneighbors(digits_validate, n_neighbors, return_similarity=True) assert np.all(inds1 == inds2) assert inds1.shape == (digits_validate.shape[0], n_neighbors) assert dists2.shape == inds2.shape # Then compare against scikit-learn. Intentionally using fewer neighbors to make sure recall will be 1 # despite out-of-order indices due to equal distances. ref = NearestNeighbors(n_neighbors=int(n_neighbors / 2), algorithm='brute', metric='jaccard', n_jobs=1) ref.fit(digits_train) inds3 = ref.kneighbors(digits_validate, return_distance=False) # Compute and check the recall. rec = self.recall(inds3, inds2) assert np.all(rec == 1)
def __init__(self, L: int, k: int, w: int): self.name_prefix = f"eknn-l2lsh-L={L}-k={k}-w={w}" self.name = None # set based on query args. self.model = ElastiknnModel("lsh", "l2", mapping_params=dict(L=L, k=k, w=w)) self.X_max = 1.0 self.query_params = dict() self.batch_res = None self.sum_query_dur = 0 self.num_queries = 0 es_wait()
def lsh(dataset: Dataset, bands: int = 165, rows: int = 1, candidates: float = 1.5): n_neighbors = len(dataset.queries[0].indices) eknn = ElastiknnModel( algorithm='lsh', metric='jaccard', n_jobs=1, index=f"{INDEX}-{int(time())}", mapping_params={ "bands": bands, "rows": rows }, query_params={"candidates": int(candidates * n_neighbors)}) return evaluate(dataset, eknn)
def indexed(dataset: Dataset): eknn = ElastiknnModel(algorithm='sparse_indexed', metric='jaccard', n_jobs=1, index=f"{INDEX}-{int(time())}") return evaluate(dataset, eknn)
def exact(dataset: Dataset): eknn = ElastiknnModel(algorithm='exact', metric='jaccard', n_jobs=1, index=f"{INDEX}-exact") return evaluate(dataset, eknn)