Beispiel #1
0
def index_generator(db_file_path: str):
    documents = fvecs_read(db_file_path)
    for id, data in enumerate(documents):
        with Document() as doc:
            doc.content = data
            doc.tags['id'] = id
        yield doc
Beispiel #2
0
def evaluate_generator(db_file_path: str, groundtruth_path: str):
    documents = fvecs_read(db_file_path)
    groundtruths = ivecs_read(groundtruth_path)

    for data_doc, gt_indexes in zip(documents, groundtruths):
        with Document() as doc:
            doc.content = data_doc
        with Document() as groundtruth:
            for index in gt_indexes:
                with Document() as match:
                    match.tags['id'] = int(index.item())
                groundtruth.matches.add(match)

        yield doc, groundtruth
Beispiel #3
0
def read_data(db_file_path: str, batch_size: int, max_docs: int = None):
    vectors = fvecs_read(db_file_path)
    num_vectors = vectors.shape[0]
    batch_size = 1 if batch_size == -1 else batch_size
    num_batches = int(num_vectors / batch_size)

    if max_docs is not None:
        batch_size = max_docs
        num_batches = 1

    for i in range(1, num_batches + 1):
        start_batch = (i - 1) * batch_size
        end_batch = i * batch_size if i * batch_size < num_vectors else num_vectors
        keys = np.arange(start_batch,
                         end_batch).reshape(end_batch - start_batch, 1)
        yield keys, vectors[start_batch:end_batch]
Beispiel #4
0
def read_data(db_file_path: str):
    return fvecs_read(db_file_path)
__copyright__ = "Copyright (c) 2020 Jina AI Limited. All rights reserved."
__license__ = "Apache-2.0"

import gzip
import os
from read_vectors_files import fvecs_read

os.environ['TMP_DATA_DIR'] = '/tmp/jina/faiss/siftsmall'
train_filepath = 'workspace/train.tgz'
train_fvecs_path = os.path.join(os.environ['TMP_DATA_DIR'], 'siftsmall_learn.fvecs')
train_data = fvecs_read(train_fvecs_path)
with gzip.open(train_filepath, 'wb', compresslevel=1) as f:
    f.write(train_data.tobytes())