コード例 #1
0
 def __init__(self, metric: str, dimension: int):
     self.name = f"eknn-exact-metric={metric}_dimension={dimension}"
     self.metric = metric
     self.dimension = dimension
     self.model = ElastiknnModel("exact", dealias_metric(metric))
     self.batch_res = None
     es_wait()
コード例 #2
0
ファイル: test_model.py プロジェクト: alexklibisz/elastiknn
    def test_exact_jaccard_mnist(self):
        # First run the query and make sure the results have the right form.
        n_neighbors = 20
        model = ElastiknnModel('exact', 'jaccard')
        model.fit(digits_train)

        inds1 = model.kneighbors(digits_validate, n_neighbors)
        inds2, dists2 = model.kneighbors(digits_validate,
                                         n_neighbors,
                                         return_similarity=True)

        assert np.all(inds1 == inds2)
        assert inds1.shape == (digits_validate.shape[0], n_neighbors)
        assert dists2.shape == inds2.shape

        # Then compare against scikit-learn. Intentionally using fewer neighbors to make sure recall will be 1
        # despite out-of-order indices due to equal distances.
        ref = NearestNeighbors(n_neighbors=int(n_neighbors / 2),
                               algorithm='brute',
                               metric='jaccard',
                               n_jobs=1)
        ref.fit(digits_train)
        inds3 = ref.kneighbors(digits_validate, return_distance=False)

        # Compute and check the recall.
        rec = self.recall(inds3, inds2)
        assert np.all(rec == 1)
コード例 #3
0
 def __init__(self, L: int, k: int, w: int):
     self.name_prefix = f"eknn-l2lsh-L={L}-k={k}-w={w}"
     self.name = None  # set based on query args.
     self.model = ElastiknnModel("lsh",
                                 "l2",
                                 mapping_params=dict(L=L, k=k, w=w))
     self.X_max = 1.0
     self.query_params = dict()
     self.batch_res = None
     self.sum_query_dur = 0
     self.num_queries = 0
     es_wait()
コード例 #4
0
def lsh(dataset: Dataset,
        bands: int = 165,
        rows: int = 1,
        candidates: float = 1.5):
    n_neighbors = len(dataset.queries[0].indices)
    eknn = ElastiknnModel(
        algorithm='lsh',
        metric='jaccard',
        n_jobs=1,
        index=f"{INDEX}-{int(time())}",
        mapping_params={
            "bands": bands,
            "rows": rows
        },
        query_params={"candidates": int(candidates * n_neighbors)})
    return evaluate(dataset, eknn)
コード例 #5
0
def indexed(dataset: Dataset):
    eknn = ElastiknnModel(algorithm='sparse_indexed',
                          metric='jaccard',
                          n_jobs=1,
                          index=f"{INDEX}-{int(time())}")
    return evaluate(dataset, eknn)
コード例 #6
0
ファイル: jaccard.py プロジェクト: yeus/elastiknn
def exact(dataset: Dataset):
    eknn = ElastiknnModel(algorithm='exact',
                          metric='jaccard',
                          n_jobs=1,
                          index=f"{INDEX}-exact")
    return evaluate(dataset, eknn)