Exemple #1
0
def test_input_files(patterns, recursive, size, sampling_rate, read_mode):
    Client.check_input(_input_files(patterns=patterns,
                                    recursive=recursive,
                                    size=size,
                                    sampling_rate=sampling_rate,
                                    read_mode=read_mode),
                       data_type=DataInputType.CONTENT)
def test_incremental_indexing_sequential_indexers_content_hash(
        random_workspace, restful):
    total_docs = 20
    duplicate_docs, _ = get_duplicate_docs(num_docs=total_docs,
                                           same_content=False)
    # because the content is % 2
    num_uniq_docs = 10

    f = (Flow(restful=restful).add(
        uses=os.path.join(cur_dir, 'uniq_vectorindexer_content_hash.yml')).add(
            uses=os.path.join(cur_dir, 'uniq_docindexer_content_hash.yml')))

    Client.check_input(duplicate_docs[:10])
    Client.check_input(duplicate_docs)

    with f:
        f.index(duplicate_docs[:10])

    with f:
        f.index(duplicate_docs)

    with BaseExecutor.load(random_workspace / 'inc_vecindexer' /
                           'vec_idx.bin') as vector_indexer:
        assert isinstance(vector_indexer, NumpyIndexer)
        assert vector_indexer._size == num_uniq_docs

    with BaseExecutor.load(random_workspace / 'inc_docindexer' /
                           'doc_idx.bin') as doc_indexer:
        assert isinstance(doc_indexer, BinaryPbIndexer)
        assert doc_indexer._size == num_uniq_docs
def test_incremental_indexing_sequential_indexers(random_workspace, restful):
    total_docs = 20
    duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs)

    f = (
        Flow(restful=restful)
        .add(uses=os.path.join(cur_dir, 'uniq_vectorindexer.yml'))
        .add(uses=os.path.join(cur_dir, 'uniq_docindexer.yml'))
    )

    Client.check_input(duplicate_docs[:10])
    Client.check_input(duplicate_docs)

    with f:
        f.index(duplicate_docs[:10])

    with f:
        f.index(duplicate_docs)

    print(f' random_workspace {random_workspace}')

    with BaseExecutor.load(
        random_workspace / 'inc_vecindexer' / 'vec_idx-0' / 'vec_idx.bin'
    ) as vector_indexer:
        assert isinstance(vector_indexer, NumpyIndexer)
        assert vector_indexer._size == num_uniq_docs

    with BaseExecutor.load(
        random_workspace / 'inc_docindexer' / 'doc_idx-0' / 'doc_idx.bin'
    ) as doc_indexer:
        assert isinstance(doc_indexer, BinaryPbIndexer)
        assert doc_indexer._size == num_uniq_docs
Exemple #4
0
def test_input_files(patterns, recursive, size, sampling_rate, read_mode):
    Client.check_input(
        from_files(
            patterns=patterns,
            recursive=recursive,
            size=size,
            sampling_rate=sampling_rate,
            read_mode=read_mode,
        ))
Exemple #5
0
def test_querylang_request():
    qs = QueryLang({
        'name': 'SliceQL',
        'parameters': {
            'start': 1,
            'end': 4
        },
        'priority': 1
    })
    Client.check_input(random_docs(10), queryset=qs)
Exemple #6
0
def test_check_input_fail(inputs):
    with pytest.raises(BadClientInput):
        Client.check_input(inputs)
Exemple #7
0
def test_check_input_success(inputs):
    Client.check_input(inputs)
Exemple #8
0
def test_querylang_request():
    qs = QueryLang(SliceQL(start=1, end=4, priority=1))
    Client.check_input(random_docs(10), queryset=qs)
Exemple #9
0
def test_check_input_fail(inputs):
    client = Client(host='localhost', port_jinad=12345)
    with pytest.raises(BadClientInput):
        client.check_input(inputs)
Exemple #10
0
def test_check_input_success(inputs):
    client = Client(host='localhost', port_jinad=12345)
    client.check_input(inputs)
Exemple #11
0
def test_input_numpy(array):
    Client.check_input(_input_ndarray(array))
Exemple #12
0
def test_input_files_with_invalid_read_mode():
    with pytest.raises(BadClientInput):
        Client.check_input(_input_files(patterns='*.*', read_mode='invalid'))
Exemple #13
0
def test_input_numpy(array):
    Client.check_input(DocumentArray.from_ndarray(array))