def setUp(self): space_type = 'cosinesimil_sparse' space_param = [] method_name = 'small_world_rand' index_name = method_name + '.index' if os.path.isfile(index_name): os.remove(index_name) self.index = nmslib.init(space_type, space_param, method_name, nmslib.DataType.SPARSE_VECTOR, nmslib.DistType.FLOAT)
def setUp(self): space_type = 'normleven' space_param = [] method_name = 'small_world_rand' index_name = method_name + '.index' if os.path.isfile(index_name): os.remove(index_name) self.index = nmslib.init(space_type, space_param, method_name, nmslib.DataType.OBJECT_AS_STRING, nmslib.DistType.FLOAT)
def build(self, index_file: Path, dim: int): self.dim = dim self.index_file = index_file matrix = np.memmap(index_file, dtype=np.float32, mode='r+') matrix = matrix.reshape((-1, dim)) self.index = nmslib.init(space='cosinesimil', method='sw-graph') nmslib.addDataPointBatch(self.index, np.arange(matrix.shape[0], dtype=np.int32), matrix) self.index.createIndex({}, print_progress=True)
def testDenseCosine(self): np.random.seed(23) data = np.random.randn(1000, 10).astype(np.float32) index = nmsbind.init(method='sw-graph', space='cosinesimil') index.addDataPointBatch(data) index.createIndex() row = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1.]) ids, distances = index.knnQuery(row, k=10) self.assertTrue(get_hitrate(get_exact_cosine(row, data), ids) >= 8) results = index.knnQueryBatch([row, row], k=10) self.assertTrue( get_hitrate(get_exact_cosine(row, data), results[0][0]) >= 8)
def testSparse(self): index = nmsbind.init(method='small_world_rand', space='cosinesimil_sparse', data_type=nmsbind.DataType.SPARSE_VECTOR) index.addDataPoint(0, [(1, 2.), (2, 3.)]) index.addDataPoint(1, [(0, 1.), (1, 2.)]) index.addDataPoint(2, [(2, 3.), (3, 3.)]) index.addDataPoint(3, [(3, 1.)]) index.createIndex() ids, distances = index.knnQuery([(1, 2.), (2, 3.)]) self.assertEqual(ids[0], 0) self.assertEqual(distances[0], 0) self.assertEqual(len(index), 4) self.assertEqual(index[3], [(3, 1.0)])
def testStringLeven(self): index = nmsbind.init(space='leven', dtype=nmsbind.DistType.INT, data_type=nmsbind.DataType.OBJECT_AS_STRING, method='small_world_rand') strings = [ ''.join(x) for x in itertools.permutations(['a', 't', 'c', 'g']) ] index.addDataPointBatch(strings) index.addDataPoint(len(index), "atat") index.addDataPoint(len(index), "gaga") index.createIndex() for i, distance in zip(*index.knnQuery(strings[0])): self.assertEqual(index.getDistance(0, i), distance) self.assertEqual(len(index), len(strings) + 2) self.assertEqual(index[0], strings[0]) self.assertEqual(index[len(index) - 2], 'atat')