コード例 #1
0
ファイル: test_n2.py プロジェクト: RossSong/n2
    def test_search_by_vector(self):
        f = 2
        i = HnswIndex(f, 'L2')
        i.add_data([2, 2])
        i.add_data([3, 2])
        i.add_data([3, 3])
        i.build()

        self.assertEqual(i.search_by_vector([4, 4], 3), [2, 1, 0])
        self.assertEqual(i.search_by_vector([1, 1], 3), [0, 1, 2])
        self.assertEqual(i.search_by_vector([4, 2], 3), [1, 2, 0])
コード例 #2
0
ファイル: test_n2.py プロジェクト: RossSong/n2
    def test_search_by_vector(self):
        f = 3
        i = HnswIndex(f)
        i.add_data([0, 0, 1])
        i.add_data([0, 1, 0])
        i.add_data([1, 0, 0])
        i.build(max_m0=10, m=5)

        self.assertEqual(i.search_by_vector([3, 2, 1], 3), [2, 1, 0])
        self.assertEqual(i.search_by_vector([1, 2, 3], 3), [0, 1, 2])
        self.assertEqual(i.search_by_vector([2, 0, 1], 3), [2, 0, 1])
コード例 #3
0
class N2(BaseANN):
    def __init__(self, m):
        threads = 8
        self.name = 'N2(m={}, threads={})'.format(m,threads)
        self._m = m
        self._threads = threads
        self._index = None
        print("Init done")

    def fit(self, X):
        X = numpy.array(X)	
        X = X.astype(numpy.float32)
        self._index = HnswIndex(X.shape[1],"L2")
        print("Shape", X.shape[1])
        for el in X:
            self._index.add_data(el) 
        self._index.build(m=self._m, n_threads=self._threads)
        print("Fit done")

    def query(self, v, n):
        v = v.astype(numpy.float32)
        #print(v)
        #print(n)
        #print("-----------------------------------")
        nns = self._index.search_by_vector(v,n)
        #print("[search_by_vector]: Nearest neighborhoods of vector {}: {}".format(v, nns))
        return nns
    def use_threads(self):
        return False
コード例 #4
0
ファイル: test_n2.py プロジェクト: RossSong/n2
 def test04_batch_search_by_vectors(self):
     index = HnswIndex(self.dim)
     index.load(self.model_fname)
     T = [[random.gauss(0, 1) for z in xrange(self.dim)]
          for y in xrange(100)]
     batch_res = index.batch_search_by_vectors(T,
                                               10,
                                               num_threads=12,
                                               include_distances=True)
     normal_res = [
         index.search_by_vector(t, 10, include_distances=True) for t in T
     ]
     self.assertEqual(batch_res, normal_res)
コード例 #5
0
class N2(BaseANN):
    def __init__(self, m, ef_construction, n_threads, ef_search, metric):
        self._m = m
        self._m0 = m * 2
        self._ef_construction = ef_construction
        self._n_threads = n_threads
        self._ef_search = ef_search
        self._index_name = os.path.join(
            INDEX_DIR, "youtube_n2_M%d_efCon%d_n_thread%s" %
            (m, ef_construction, n_threads))
        self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d" % (
            m, ef_construction, n_threads, ef_search)
        self._metric = metric

        d = os.path.dirname(self._index_name)
        if not os.path.exists(d):
            os.makedirs(d)

    def fit(self, X):
        from n2 import HnswIndex
        if self._metric == 'euclidean':
            self._n2 = HnswIndex(X.shape[1], 'L2')
        else:
            self._n2 = HnswIndex(X.shape[1])
        if os.path.exists(self._index_name):
            logging.debug("Loading index from file")
            self._n2.load(self._index_name)
        else:
            logging.debug("Index file is not exist: {0}".format(
                self._index_name))
            logging.debug("Start fitting")

            for i, x in enumerate(X):
                self._n2.add_data(x.tolist())
            self._n2.build(m=self._m,
                           max_m0=self._m0,
                           ef_construction=self._ef_construction,
                           n_threads=self._n_threads)
            self._n2.save(self._index_name)

    def query(self, v, n):
        return self._n2.search_by_vector(v.tolist(), n, self._ef_search)

    def __str__(self):
        return self.name
コード例 #6
0
ファイル: benchmark_script.py プロジェクト: vx0918/n2
class N2(BaseANN):
    def __init__(self, m, ef_construction, n_threads, ef_search, metric, batch):
        self.name = "N2_M%d_efCon%d_n_thread%s_efSearch%d%s" % (m, ef_construction, n_threads, ef_search,
                                                                '_batch' if batch else '')
        self._m = m
        self._m0 = m * 2
        self._ef_construction = ef_construction
        self._n_threads = n_threads
        self._ef_search = ef_search
        self._index_name = os.path.join(CACHE_DIR, "index_n2_%s_M%d_efCon%d_n_thread%s"
                                        % (args.dataset, m, ef_construction, n_threads))
        self._metric = metric

    def fit(self, X):
        if self._metric == 'euclidean':
            self._n2 = HnswIndex(X.shape[1], 'L2')
        elif self._metric == 'dot':
            self._n2 = HnswIndex(X.shape[1], 'dot')
        else:
            self._n2 = HnswIndex(X.shape[1])

        if os.path.exists(self._index_name):
            n2_logger.info("Loading index from file")
            self._n2.load(self._index_name, use_mmap=False)
            return

        n2_logger.info("Create Index")
        for i, x in enumerate(X):
            self._n2.add_data(x)
        self._n2.build(m=self._m, max_m0=self._m0, ef_construction=self._ef_construction, n_threads=self._n_threads)
        self._n2.save(self._index_name)

    def query(self, v, n):
        return self._n2.search_by_vector(v, n, self._ef_search)

    def batch_query(self, X, n):
        self.b_res = self._n2.batch_search_by_vectors(X, n, self._ef_search, self._n_threads)

    def get_batch_results(self):
        return self.b_res

    def __str__(self):
        return self.name
コード例 #7
0
ファイル: test_n2.py プロジェクト: RossSong/n2
    def precision(self, n, n_trees=10, n_points=10000, n_rounds=10):
        found = 0
        for r in xrange(n_rounds):
            # create random points at distance x from (1000, 0, 0, ...)
            f = 10
            i = HnswIndex(f, 'L2')
            for j in xrange(n_points):
                p = [random.gauss(0, 1) for z in xrange(f - 1)]
                norm = sum([pi**2 for pi in p])**0.5
                x = [1000] + [pi / norm * j for pi in p]
                i.add_data(x)

            i.build()

            nns = i.search_by_vector([1000] + [0] * (f - 1), n)
            self.assertEqual(nns, sorted(nns))  # should be in order
            # The number of gaps should be equal to the last item minus n-1
            found += len([_x for _x in nns if _x < n])

        return 1.0 * found / (n * n_rounds)
コード例 #8
0
class N2(BaseANN):
    def __init__(self, m):
        threads = 8
        self.name = 'N2(m={}, threads={})'.format(m, threads)
        self._m = m
        self._threads = threads
        self._index = None

    def fit(self, X):
        X = numpy.array(X)
        X = X.astype(numpy.float32)
        self._index = HnswIndex(X.shape[1], "L2")
        for el in X:
            self._index.add_data(el)
        self._index.build(m=self._m, n_threads=self._threads)

    def query(self, v, n):
        v = v.astype(numpy.float32)
        nns = self._index.search_by_vector(v, n)
        return nns
コード例 #9
0
ファイル: example_angular.py プロジェクト: oddconcepts/n2o
from n2 import HnswIndex
import random

f = 3
t = HnswIndex(f)  # HnswIndex(f, "L2 or angular")
for i in xrange(1000):
    v = [random.gauss(0, 1) for z in xrange(f)]
    t.add_data(v)

t.build(m=5, max_m0=10, n_threads=4)
t.save('test.n2')

u = HnswIndex(f, "angular")
u.load('test.n2')

search_id = 1
k = 3
neighbor_ids = u.search_by_id(search_id, k)
print(
    "[search_by_id]: Nearest neighborhoods of id {}: {}".format(
        search_id,
        neighbor_ids))

example_vector_query = [random.gauss(0, 1) for z in xrange(f)]
nns = u.search_by_vector(example_vector_query, k, include_distances=True)
print(
    "[search_by_vector]: Nearest neighborhoods of vector {}: {}".format(
        example_vector_query,
        nns))