예제 #1
0
 def setUp(self):
     space_type = 'cosinesimil_sparse'
     space_param = []
     method_name = 'small_world_rand'
     index_name = method_name + '.index'
     if os.path.isfile(index_name):
         os.remove(index_name)
     self.index = nmslib.init(space_type, space_param, method_name,
                              nmslib.DataType.SPARSE_VECTOR,
                              nmslib.DistType.FLOAT)
예제 #2
0
 def setUp(self):
     space_type = 'normleven'
     space_param = []
     method_name = 'small_world_rand'
     index_name = method_name + '.index'
     if os.path.isfile(index_name):
         os.remove(index_name)
     self.index = nmslib.init(space_type, space_param, method_name,
                              nmslib.DataType.OBJECT_AS_STRING,
                              nmslib.DistType.FLOAT)
    def build(self, index_file: Path, dim: int):
        self.dim = dim
        self.index_file = index_file

        matrix = np.memmap(index_file, dtype=np.float32, mode='r+')
        matrix = matrix.reshape((-1, dim))

        self.index = nmslib.init(space='cosinesimil', method='sw-graph')
        nmslib.addDataPointBatch(self.index,
                                 np.arange(matrix.shape[0], dtype=np.int32),
                                 matrix)
        self.index.createIndex({}, print_progress=True)
예제 #4
0
    def testDenseCosine(self):
        np.random.seed(23)
        data = np.random.randn(1000, 10).astype(np.float32)

        index = nmsbind.init(method='sw-graph', space='cosinesimil')
        index.addDataPointBatch(data)
        index.createIndex()

        row = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1.])
        ids, distances = index.knnQuery(row, k=10)
        self.assertTrue(get_hitrate(get_exact_cosine(row, data), ids) >= 8)

        results = index.knnQueryBatch([row, row], k=10)
        self.assertTrue(
            get_hitrate(get_exact_cosine(row, data), results[0][0]) >= 8)
예제 #5
0
    def testSparse(self):
        index = nmsbind.init(method='small_world_rand',
                             space='cosinesimil_sparse',
                             data_type=nmsbind.DataType.SPARSE_VECTOR)

        index.addDataPoint(0, [(1, 2.), (2, 3.)])
        index.addDataPoint(1, [(0, 1.), (1, 2.)])
        index.addDataPoint(2, [(2, 3.), (3, 3.)])
        index.addDataPoint(3, [(3, 1.)])

        index.createIndex()

        ids, distances = index.knnQuery([(1, 2.), (2, 3.)])
        self.assertEqual(ids[0], 0)
        self.assertEqual(distances[0], 0)

        self.assertEqual(len(index), 4)
        self.assertEqual(index[3], [(3, 1.0)])
예제 #6
0
    def testStringLeven(self):
        index = nmsbind.init(space='leven',
                             dtype=nmsbind.DistType.INT,
                             data_type=nmsbind.DataType.OBJECT_AS_STRING,
                             method='small_world_rand')

        strings = [
            ''.join(x) for x in itertools.permutations(['a', 't', 'c', 'g'])
        ]

        index.addDataPointBatch(strings)

        index.addDataPoint(len(index), "atat")
        index.addDataPoint(len(index), "gaga")
        index.createIndex()

        for i, distance in zip(*index.knnQuery(strings[0])):
            self.assertEqual(index.getDistance(0, i), distance)

        self.assertEqual(len(index), len(strings) + 2)
        self.assertEqual(index[0], strings[0])
        self.assertEqual(index[len(index) - 2], 'atat')