def test_queryset_with_struct(random_workspace): total_docs = 4 docs = [] for doc_id in range(total_docs): doc = jina_pb2.Document() doc.text = f'I am doc{doc_id}' doc.embedding.CopyFrom(array2pb(np.array([doc_id]))) doc.tags['label'] = f'label{doc_id%2 + 1}' docs.append(doc) f = (Flow().add( uses= '- !FilterQL | {lookups: {tags__label__in: [label1, label2]}, traversal_paths: [r]}' )) def validate_all_docs(resp): assert len(resp.docs) == total_docs def validate_label2_docs(resp): assert len(resp.docs) == total_docs / 2 with f: # keep all the docs f.index(docs, output_fn=validate_all_docs, callback_on_body=True) # keep only the docs with label2 qs = jina_pb2.QueryLang(name='FilterQL', priority=1) qs.parameters['lookups'] = {'tags__label': 'label2'} qs.parameters['traversal_paths'] = ['r'] f.index(docs, queryset=qs, output_fn=validate_label2_docs, callback_on_body=True)
def test_read_from_req(): def validate1(req): assert len(req.docs) == 5 def validate2(req): assert len(req.docs) == 3 qs = jina_pb2.QueryLang(name='SliceQL', priority=1) qs.parameters['start'] = 1 qs.parameters['end'] = 4 f = Flow(callback_on_body=True).add(uses='- !SliceQL | {start: 0, end: 5}') # without queryset with f: f.index(random_docs(10), output_fn=validate1) # with queryset with f: f.index(random_docs(10), queryset=qs, output_fn=validate2) qs.priority = -1 # with queryset, but priority is no larger than driver's default with f: f.index(random_docs(10), queryset=qs, output_fn=validate1)
def test_topk_override(config): # Making queryset top_k_queryset = jina_pb2.QueryLang() top_k_queryset.name = 'VectorSearchDriver' top_k_queryset.priority = 1 top_k_queryset.parameters['top_k'] = os.environ['JINA_TOPK_OVERRIDE'] with Flow().load_config('flow.yml') as index_flow: index_flow.index(input_fn=random_docs(100)) with Flow().load_config('flow.yml') as search_flow: search_flow.search(input_fn=random_docs(int(os.environ['JINA_NDOCS'])), output_fn=validate_override_results, queryset=[top_k_queryset])
def queryset(self): q = jina_pb2.QueryLang() q.name = 'SimpleVectorSearchDriver' q.priority = 1 q.parameters['top_k'] = 4 return [q]