Exemple #1
0
def test_exec_fn_annotation():
    class MyExecutor(BaseEncoder):
        def foo(self, a: 'np.ndarray', b: np.ndarray, c: np.float, *args,
                **kwargs) -> 'np.ndarray':
            pass

    exec = MyExecutor()
    bd = EncodeDriver(method='foo', strict_method_args=False)

    bd.attach(exec, runtime=None)

    assert bd._exec_fn_return_is_ndarray
    assert bd._exec_fn_required_keys_is_ndarray == [True, True, False]
def test_encode_driver_batching_with_chunks(request_batch_size,
                                            driver_batch_size, num_chunks,
                                            num_chunks_chunks, tmpdir):
    num_docs = 1315
    num_requests = int(num_docs / request_batch_size)
    num_docs_last_req_batch = num_docs % (num_requests * request_batch_size)

    def validate_response(resp):
        valid_resp_length = (len(resp.search.docs)
                             == request_batch_size) or (len(
                                 resp.search.docs) == num_docs_last_req_batch)
        assert valid_resp_length
        for doc in resp.search.docs:
            assert NdArray(doc.embedding).value is not None
            for chunk in doc.chunks:
                assert NdArray(chunk.embedding).value is not None
                for chunk_chunk in chunk.chunks:
                    assert NdArray(chunk_chunk.embedding).value is not None

    def fail_if_error(resp):
        assert False

    encoder = MockEncoder(driver_batch_size=driver_batch_size,
                          num_docs_in_same_request=request_batch_size +
                          request_batch_size * num_chunks +
                          request_batch_size * num_chunks * num_chunks_chunks,
                          total_num_docs=num_docs + num_docs * num_chunks +
                          num_docs * num_chunks * num_chunks_chunks)

    driver = EncodeDriver(batch_size=driver_batch_size,
                          traversal_paths=('r', 'c', 'cc'))

    encoder._drivers.clear()
    encoder._drivers['SearchRequest'] = [driver]

    executor_yml_file = os.path.join(tmpdir, 'executor.yml')
    encoder.save_config(executor_yml_file)

    with Flow().add(uses=executor_yml_file) as f:
        f.search(input_fn=document_generator(num_docs, num_chunks,
                                             num_chunks_chunks),
                 batch_size=request_batch_size,
                 on_done=validate_response,
                 on_error=fail_if_error)
Exemple #3
0
def test_extract_bad_fields_no_strict_args(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, hello):
            encode_mock()

    exec = MyExecutor()
    bd = EncodeDriver(strict_method_args=False)

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_not_called()
Exemple #4
0
def test_exec_fn_arbitrary_name(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def foo(self, id):
            assert isinstance(id[0], str)
            assert isinstance(id, list)
            encode_mock()

    exec = MyExecutor()
    bd = EncodeDriver(method='foo')

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()
Exemple #5
0
def test_extract_multi_fields(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id, embedding):
            encode_mock()
            assert isinstance(id, list)
            assert isinstance(embedding, list)
            assert isinstance(id[0], str)
            assert isinstance(embedding[0], np.ndarray)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()
Exemple #6
0
def test_exec_fn_return_doc(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id):
            encode_mock()
            return [Document(mime_type='image/png')] * len(id)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()

    for d in ds:
        assert d.mime_type == 'image/png'
def test_exec_fn_return_dict(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, id):
            encode_mock()
            return [{'id': 'hello'}] * len(id)

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentSet(docs)

    bd._apply_all(ds)
    encode_mock.assert_called()

    for d in ds:
        assert d.id == 'hello'
Exemple #8
0
def test_extract_bad_fields(mocker):
    encode_mock = mocker.Mock()

    class MyExecutor(BaseEncoder):
        def encode(self, data):
            encode_mock()

    exec = MyExecutor()
    bd = EncodeDriver()

    bd.attach(exec, runtime=None)
    docs = list(random_docs(10))

    ds = DocumentArray(docs)

    with pytest.raises(AttributeError,
                       match='is now deprecated and not a valid argument'):
        bd._apply_all(ds)
    encode_mock.assert_not_called()

    class MyExecutor(BaseEncoder):
        def encode(self, hello):
            encode_mock()

    exec = MyExecutor()
    bd = EncodeDriver()
    bd.attach(exec, runtime=None)

    with pytest.raises(AttributeError,
                       match='are invalid Document attributes'):
        bd._apply_all(ds)
    encode_mock.assert_not_called()

    class MyExecutor(BaseEncoder):
        def encode(self, mimeType):
            encode_mock()

    exec = MyExecutor()
    bd = EncodeDriver()
    bd.attach(exec, runtime=None)

    with pytest.raises(AttributeError, match='you give them in CamelCase'):
        bd._apply_all(ds)
    encode_mock.assert_not_called()