def do_read_callback(self, bsz): d, n = 32, 1000 x = np.random.uniform(size=(n, d)).astype('float32') index = faiss.IndexFlatL2(d) index.add(x) fd, fname = tempfile.mkstemp() os.close(fd) try: faiss.write_index(index, fname) with open(fname, 'rb') as f: reader = faiss.PyCallbackIOReader(f.read, 1234) if bsz > 0: reader = faiss.BufferedIOReader(reader, bsz) index2 = faiss.read_index(reader) self.assertEqual(index.d, index2.d) np.testing.assert_array_equal(faiss.vector_to_array(index.xb), faiss.vector_to_array(index2.xb)) # This is not a callable function: should raise an exception reader = faiss.PyCallbackIOReader("blabla") self.assertRaises(Exception, faiss.read_index, reader) finally: if os.path.exists(fname): os.unlink(fname)
def load_faiss_index(path_to_faiss="models/lex_similar_sentences.index"): """Load and deserialize the Faiss index.""" data = urllib.request.urlopen( "https://podcast-search-scify.s3.amazonaws.com/lex_similar_sentences_distil.index" ) reader = faiss.PyCallbackIOReader(data.read) index = faiss.read_index(reader) return index
def load_faiss_index(self): from .url_utils import use_s3 item_index_input_dir = use_s3('%sfaiss/item_index/' % self.model_in_path) self.faiss_index = faiss.IndexShards(self.item_embedding_size, True, False) partition_count = self.get_partition_count() for rank in range(partition_count): item_index_input_path = '%spart_%d_%d.dat' % ( item_index_input_dir, partition_count, rank) item_index_stream = _mindalpha.InputStream(item_index_input_path) item_index_reader = faiss.PyCallbackIOReader( item_index_stream.read) index = faiss.read_index(item_index_reader) self.faiss_index.add_shard(index) print('faiss index ntotal: %d' % self.faiss_index.ntotal)
def test_buf_read(self): x = np.random.uniform(size=20) _, fname = tempfile.mkstemp() try: x.tofile(fname) f = open(fname, 'rb') reader = faiss.PyCallbackIOReader(f.read, 1234) bsz = 123 reader = faiss.BufferedIOReader(reader, bsz) y = np.zeros_like(x) print('nbytes=', y.nbytes) reader(faiss.swig_ptr(y), y.nbytes, 1) np.testing.assert_array_equal(x, y) finally: if os.path.exists(fname): os.unlink(fname)
def index_from_pipe(): reader = faiss.PyCallbackIOReader(lambda size: os.read(rf, size)) return faiss.read_index(reader)