def add_default_kwargs(kwargs: Dict): # TODO: refactor it into load from config file if ('top_k' in kwargs) and (kwargs['top_k'] is not None): # associate all VectorSearchDriver and SliceQL driver to use top_k from jina import QueryLang topk_ql = [ QueryLang({ 'name': 'SliceQL', 'priority': 1, 'parameters': { 'end': kwargs['top_k'] } }), QueryLang({ 'name': 'VectorSearchDriver', 'priority': 1, 'parameters': { 'top_k': kwargs['top_k'] } }) ] if 'queryset' not in kwargs: kwargs['queryset'] = topk_ql else: kwargs['queryset'].extend(topk_ql)
def test_topk_override(config, mocker): NDOCS = 3 TOPK_OVERRIDE = 11 def validate(resp): assert len(resp.search.docs) == NDOCS for doc in resp.search.docs: assert len(doc.matches) == TOPK_OVERRIDE # Making queryset top_k_queryset = QueryLang( { 'name': 'VectorSearchDriver', 'parameters': {'top_k': TOPK_OVERRIDE}, 'priority': 1, } ) with Flow.load_config('flow.yml') as index_flow: index_flow.index(inputs=random_docs(100)) mock = mocker.Mock() with Flow.load_config('flow.yml') as search_flow: search_flow.search( inputs=random_docs(NDOCS), on_done=mock, queryset=[top_k_queryset] ) mock.assert_called_once() validate_callback(mock, validate)
def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.queryset.extend([q1, q2, q3]) assert isinstance(r.queryset, Sequence) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 # q1 and q2 refer to the same assert len({id(q) for q in r.queryset}) == 2 r2 = Request() r2.queryset.extend(r.queryset) assert len({id(q) for q in r2.queryset}) == 2 r = Request() r.queryset.append(q1) r.queryset.append(q2) r.queryset.append(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.queryset.extend(1)
def test_queryset_with_struct(random_workspace, mocker): total_docs = 4 docs = [] for doc_id in range(total_docs): doc = jina_pb2.DocumentProto() doc.text = f'I am doc{doc_id}' NdArray(doc.embedding).value = np.array([doc_id]) doc.tags['label'] = f'label{doc_id % 2 + 1}' docs.append(doc) f = (Flow() .add(uses='- !FilterQL | {lookups: {tags__label__in: [label1, label2]}, traversal_paths: [r]}')) def validate_all_docs(resp): assert len(resp.docs) == total_docs def validate_label2_docs(resp): assert len(resp.docs) == total_docs / 2 mock1 = mocker.Mock() mock2 = mocker.Mock() with f: # keep all the docs f.index(docs, on_done=mock1) # keep only the docs with label2 qs = QueryLang({'name': 'FilterQL', 'priority': 1, 'parameters': {'lookups': {'tags__label': 'label2'}, 'traversal_paths': ['r']}}) f.index(docs, queryset=qs, on_done=mock2) mock1.assert_called_once() validate_callback(mock1, validate_all_docs) mock2.assert_called_once() validate_callback(mock2, validate_label2_docs)
def test_request_extend_queryset(): q1 = SliceQL(start=3, end=4) q2 = QueryLang(SliceQL(start=3, end=4, priority=1)) q3 = jina_pb2.QueryLangProto() q3.name = 'SliceQL' q3.parameters['start'] = 3 q3.parameters['end'] = 4 q3.priority = 2 r = Request() r.extend_queryset([q1, q2, q3]) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 r = Request() r.extend_queryset(q1) r.extend_queryset(q2) r.extend_queryset(q3) for idx, q in enumerate(r.queryset): assert q.priority == idx assert q.parameters['start'] == 3 assert q.parameters['end'] == 4 with pytest.raises(TypeError): r.extend_queryset(1)
def test_queryset_with_struct(random_workspace): total_docs = 4 docs = [] for doc_id in range(total_docs): doc = jina_pb2.DocumentProto() doc.text = f'I am doc{doc_id}' NdArray(doc.embedding).value = np.array([doc_id]) doc.tags['label'] = f'label{doc_id % 2 + 1}' docs.append(doc) f = (Flow().add( uses= '- !FilterQL | {lookups: {tags__label__in: [label1, label2]}, traversal_paths: [r]}' )) def validate_all_docs(resp): assert len(resp.docs) == total_docs def validate_label2_docs(resp): assert len(resp.docs) == total_docs / 2 with f: # keep all the docs f.index(docs, output_fn=validate_all_docs, callback_on='body') # keep only the docs with label2 qs = QueryLang( FilterQL(priority=1, lookups={'tags__label': 'label2'}, traversal_paths=['r'])) f.index(docs, queryset=qs, output_fn=validate_label2_docs, callback_on='body')
def test_topk_override(config): # Making queryset top_k_queryset = QueryLang( VectorSearchDriver(top_k=int(os.environ['JINA_TOPK_OVERRIDE']), priority=1)) with Flow.load_config('flow.yml') as index_flow: index_flow.index(input_fn=random_docs(100)) with Flow.load_config('flow.yml') as search_flow: search_flow.search(input_fn=random_docs(int(os.environ['JINA_NDOCS'])), output_fn=validate_override_results, queryset=[top_k_queryset])
def test_topk_override(config, mocker): NDOCS = 3 TOPK_OVERRIDE = 11 def validate(resp): mock() assert len(resp.search.docs) == NDOCS for doc in resp.search.docs: assert len(doc.matches) == TOPK_OVERRIDE # Making queryset top_k_queryset = QueryLang( VectorSearchDriver(top_k=TOPK_OVERRIDE, priority=1)) with Flow.load_config('flow.yml') as index_flow: index_flow.index(input_fn=random_docs(100)) mock = mocker.Mock() with Flow.load_config('flow.yml') as search_flow: search_flow.search(input_fn=random_docs(NDOCS), on_done=validate, queryset=[top_k_queryset]) mock.assert_called_once()
import numpy as np import pytest from jina import Document, Request, QueryLang, NdArray from jina.types.score import NamedScore from jina.types.arrays import ChunkArray from jina.types.arrays.match import MatchArray @pytest.mark.parametrize( 'obj', [ Document(), Request(), QueryLang(), NamedScore(), NdArray(), MatchArray([Document()], Document()), ChunkArray([Document()], Document()), ], ) def test_builtin_str_repr_no_content(obj): print(obj) print(f'{obj!r}') @pytest.mark.parametrize( 'obj', [ Document(content='123', chunks=[Document(content='abc')]), QueryLang({
def queryset(self): q = QueryLang() q.name = 'MockVectorSearchDriverWithQS' q.priority = 1 q.parameters['top_k'] = 4 return [q]