def test_collect_training_data_point(self): self.app.query = Mock(side_effect=[ VespaResult(self.raw_vespa_result_recall), VespaResult(self.raw_vespa_result_additional), ]) query_model = QueryModel(rank_profile=RankProfile(list_features=True)) data = self.app.collect_training_data_point( query="this is a query", query_id="123", relevant_id="abc", id_field="vespa_id_field", query_model=query_model, number_additional_docs=2, fields=["rankfeatures", "title"], timeout="15s", ) self.assertEqual(self.app.query.call_count, 2) self.app.query.assert_has_calls([ call( query="this is a query", query_model=query_model, recall=("vespa_id_field", ["abc"]), timeout="15s", ), call( query="this is a query", query_model=query_model, hits=2, timeout="15s", ), ]) expected_data = [ { "document_id": "abc", "query_id": "123", "label": 1, "a": 1, "b": 2, "title": "this is a title", }, { "document_id": "def", "query_id": "123", "label": 0, "a": 3, "b": 4, "title": "this is a title 2", }, { "document_id": "ghi", "query_id": "123", "label": 0, "a": 5, "b": 6, "title": "this is a title 3", }, ] self.assertEqual(data, expected_data)
def query( self, body: Optional[Dict] = None, query: Optional[str] = None, query_model: Optional[Query] = None, debug_request: bool = False, recall: Optional[Tuple] = None, **kwargs ) -> VespaResult: """ Send a query request to the Vespa application. Either send 'body' containing all the request parameters or specify 'query' and 'query_model'. :param body: Dict containing all the request parameters. :param query: Query string :param query_model: Query model :param debug_request: return request body for debugging instead of sending the request. :param recall: Tuple of size 2 where the first element is the name of the field to use to recall and the second element is a list of the values to be recalled. :param kwargs: Additional parameters to be sent along the request. :return: Either the request body if debug_request is True or the result from the Vespa application """ if body is None: assert query is not None, "No 'query' specified." assert query_model is not None, "No 'query_model' specified." body = query_model.create_body(query=query) if recall is not None: body.update( { "recall": "+(" + " ".join( ["{}:{}".format(recall[0], str(doc)) for doc in recall[1]] ) + ")" } ) body.update(kwargs) if debug_request: return VespaResult(vespa_result={}, request_body=body) else: r = post(self.search_end_point, json=body) return VespaResult(vespa_result=r.json())
def test_collect_training_data_point_0_recall_hits(self): self.raw_vespa_result_recall = { "root": { "id": "toplevel", "relevance": 1.0, "fields": { "totalCount": 0 }, "coverage": { "coverage": 100, "documents": 62529, "full": True, "nodes": 2, "results": 1, "resultsFull": 1, }, } } self.app.query = Mock(side_effect=[ VespaResult(self.raw_vespa_result_recall), VespaResult(self.raw_vespa_result_additional), ]) query_model = QueryModel(rank_profile=RankProfile(list_features=True)) data = self.app.collect_training_data_point( query="this is a query", query_id="123", relevant_id="abc", id_field="vespa_id_field", query_model=query_model, number_additional_docs=2, fields=["rankfeatures"], timeout="15s", ) self.assertEqual(self.app.query.call_count, 1) self.app.query.assert_has_calls([ call( query="this is a query", query_model=query_model, recall=("vespa_id_field", ["abc"]), timeout="15s", ), ]) expected_data = [] self.assertEqual(data, expected_data)
def test_hits(self): empty_hits_vespa_result = VespaResult( vespa_result=self.raw_vespa_result_empty_hits) self.assertEqual(empty_hits_vespa_result.hits, []) vespa_result = VespaResult(vespa_result=self.raw_vespa_result) self.assertEqual( vespa_result.hits, [{ "id": "id:covid-19:doc::40215", "relevance": 30.368213170494712, "source": "content", "fields": { "sddocname": "doc", "body_text": "this is a body", "title": "this is a title", }, }], )
def test_json(self): vespa_result = VespaResult(vespa_result=self.raw_vespa_result) self.assertDictEqual(vespa_result.json, self.raw_vespa_result)