def test_exec_fn_annotation(): class MyExecutor(BaseEncoder): def foo(self, a: 'np.ndarray', b: np.ndarray, c: np.float, *args, **kwargs) -> 'np.ndarray': pass exec = MyExecutor() bd = EncodeDriver(method='foo', strict_method_args=False) bd.attach(exec, runtime=None) assert bd._exec_fn_return_is_ndarray assert bd._exec_fn_required_keys_is_ndarray == [True, True, False]
def test_encode_driver_batching_with_chunks(request_batch_size, driver_batch_size, num_chunks, num_chunks_chunks, tmpdir): num_docs = 1315 num_requests = int(num_docs / request_batch_size) num_docs_last_req_batch = num_docs % (num_requests * request_batch_size) def validate_response(resp): valid_resp_length = (len(resp.search.docs) == request_batch_size) or (len( resp.search.docs) == num_docs_last_req_batch) assert valid_resp_length for doc in resp.search.docs: assert NdArray(doc.embedding).value is not None for chunk in doc.chunks: assert NdArray(chunk.embedding).value is not None for chunk_chunk in chunk.chunks: assert NdArray(chunk_chunk.embedding).value is not None def fail_if_error(resp): assert False encoder = MockEncoder(driver_batch_size=driver_batch_size, num_docs_in_same_request=request_batch_size + request_batch_size * num_chunks + request_batch_size * num_chunks * num_chunks_chunks, total_num_docs=num_docs + num_docs * num_chunks + num_docs * num_chunks * num_chunks_chunks) driver = EncodeDriver(batch_size=driver_batch_size, traversal_paths=('r', 'c', 'cc')) encoder._drivers.clear() encoder._drivers['SearchRequest'] = [driver] executor_yml_file = os.path.join(tmpdir, 'executor.yml') encoder.save_config(executor_yml_file) with Flow().add(uses=executor_yml_file) as f: f.search(input_fn=document_generator(num_docs, num_chunks, num_chunks_chunks), batch_size=request_batch_size, on_done=validate_response, on_error=fail_if_error)
def test_extract_bad_fields_no_strict_args(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, hello): encode_mock() exec = MyExecutor() bd = EncodeDriver(strict_method_args=False) bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_not_called()
def test_exec_fn_arbitrary_name(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def foo(self, id): assert isinstance(id[0], str) assert isinstance(id, list) encode_mock() exec = MyExecutor() bd = EncodeDriver(method='foo') bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_called()
def test_extract_multi_fields(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id, embedding): encode_mock() assert isinstance(id, list) assert isinstance(embedding, list) assert isinstance(id[0], str) assert isinstance(embedding[0], np.ndarray) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_called()
def test_exec_fn_return_doc(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id): encode_mock() return [Document(mime_type='image/png')] * len(id) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) bd._apply_all(ds) encode_mock.assert_called() for d in ds: assert d.mime_type == 'image/png'
def test_exec_fn_return_dict(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, id): encode_mock() return [{'id': 'hello'}] * len(id) exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentSet(docs) bd._apply_all(ds) encode_mock.assert_called() for d in ds: assert d.id == 'hello'
def test_extract_bad_fields(mocker): encode_mock = mocker.Mock() class MyExecutor(BaseEncoder): def encode(self, data): encode_mock() exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) docs = list(random_docs(10)) ds = DocumentArray(docs) with pytest.raises(AttributeError, match='is now deprecated and not a valid argument'): bd._apply_all(ds) encode_mock.assert_not_called() class MyExecutor(BaseEncoder): def encode(self, hello): encode_mock() exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) with pytest.raises(AttributeError, match='are invalid Document attributes'): bd._apply_all(ds) encode_mock.assert_not_called() class MyExecutor(BaseEncoder): def encode(self, mimeType): encode_mock() exec = MyExecutor() bd = EncodeDriver() bd.attach(exec, runtime=None) with pytest.raises(AttributeError, match='you give them in CamelCase'): bd._apply_all(ds) encode_mock.assert_not_called()