def test_input_files(patterns, recursive, size, sampling_rate, read_mode): Client.check_input(_input_files(patterns=patterns, recursive=recursive, size=size, sampling_rate=sampling_rate, read_mode=read_mode), data_type=DataInputType.CONTENT)
def test_incremental_indexing_sequential_indexers_content_hash( random_workspace, restful): total_docs = 20 duplicate_docs, _ = get_duplicate_docs(num_docs=total_docs, same_content=False) # because the content is % 2 num_uniq_docs = 10 f = (Flow(restful=restful).add( uses=os.path.join(cur_dir, 'uniq_vectorindexer_content_hash.yml')).add( uses=os.path.join(cur_dir, 'uniq_docindexer_content_hash.yml'))) Client.check_input(duplicate_docs[:10]) Client.check_input(duplicate_docs) with f: f.index(duplicate_docs[:10]) with f: f.index(duplicate_docs) with BaseExecutor.load(random_workspace / 'inc_vecindexer' / 'vec_idx.bin') as vector_indexer: assert isinstance(vector_indexer, NumpyIndexer) assert vector_indexer._size == num_uniq_docs with BaseExecutor.load(random_workspace / 'inc_docindexer' / 'doc_idx.bin') as doc_indexer: assert isinstance(doc_indexer, BinaryPbIndexer) assert doc_indexer._size == num_uniq_docs
def test_incremental_indexing_sequential_indexers(random_workspace, restful): total_docs = 20 duplicate_docs, num_uniq_docs = get_duplicate_docs(num_docs=total_docs) f = ( Flow(restful=restful) .add(uses=os.path.join(cur_dir, 'uniq_vectorindexer.yml')) .add(uses=os.path.join(cur_dir, 'uniq_docindexer.yml')) ) Client.check_input(duplicate_docs[:10]) Client.check_input(duplicate_docs) with f: f.index(duplicate_docs[:10]) with f: f.index(duplicate_docs) print(f' random_workspace {random_workspace}') with BaseExecutor.load( random_workspace / 'inc_vecindexer' / 'vec_idx-0' / 'vec_idx.bin' ) as vector_indexer: assert isinstance(vector_indexer, NumpyIndexer) assert vector_indexer._size == num_uniq_docs with BaseExecutor.load( random_workspace / 'inc_docindexer' / 'doc_idx-0' / 'doc_idx.bin' ) as doc_indexer: assert isinstance(doc_indexer, BinaryPbIndexer) assert doc_indexer._size == num_uniq_docs
def test_input_files(patterns, recursive, size, sampling_rate, read_mode): Client.check_input( from_files( patterns=patterns, recursive=recursive, size=size, sampling_rate=sampling_rate, read_mode=read_mode, ))
def test_querylang_request(): qs = QueryLang({ 'name': 'SliceQL', 'parameters': { 'start': 1, 'end': 4 }, 'priority': 1 }) Client.check_input(random_docs(10), queryset=qs)
def test_check_input_fail(inputs): with pytest.raises(BadClientInput): Client.check_input(inputs)
def test_check_input_success(inputs): Client.check_input(inputs)
def test_querylang_request(): qs = QueryLang(SliceQL(start=1, end=4, priority=1)) Client.check_input(random_docs(10), queryset=qs)
def test_check_input_fail(inputs): client = Client(host='localhost', port_jinad=12345) with pytest.raises(BadClientInput): client.check_input(inputs)
def test_check_input_success(inputs): client = Client(host='localhost', port_jinad=12345) client.check_input(inputs)
def test_input_numpy(array): Client.check_input(_input_ndarray(array))
def test_input_files_with_invalid_read_mode(): with pytest.raises(BadClientInput): Client.check_input(_input_files(patterns='*.*', read_mode='invalid'))
def test_input_numpy(array): Client.check_input(DocumentArray.from_ndarray(array))