Example #1
0
    def test_collect_training_data_point(self):

        self.app.query = Mock(side_effect=[
            VespaResult(self.raw_vespa_result_recall),
            VespaResult(self.raw_vespa_result_additional),
        ])
        query_model = QueryModel(rank_profile=RankProfile(list_features=True))
        data = self.app.collect_training_data_point(
            query="this is a query",
            query_id="123",
            relevant_id="abc",
            id_field="vespa_id_field",
            query_model=query_model,
            number_additional_docs=2,
            fields=["rankfeatures", "title"],
            timeout="15s",
        )

        self.assertEqual(self.app.query.call_count, 2)
        self.app.query.assert_has_calls([
            call(
                query="this is a query",
                query_model=query_model,
                recall=("vespa_id_field", ["abc"]),
                timeout="15s",
            ),
            call(
                query="this is a query",
                query_model=query_model,
                hits=2,
                timeout="15s",
            ),
        ])
        expected_data = [
            {
                "document_id": "abc",
                "query_id": "123",
                "label": 1,
                "a": 1,
                "b": 2,
                "title": "this is a title",
            },
            {
                "document_id": "def",
                "query_id": "123",
                "label": 0,
                "a": 3,
                "b": 4,
                "title": "this is a title 2",
            },
            {
                "document_id": "ghi",
                "query_id": "123",
                "label": 0,
                "a": 5,
                "b": 6,
                "title": "this is a title 3",
            },
        ]
        self.assertEqual(data, expected_data)
Example #2
0
    def query(
        self,
        body: Optional[Dict] = None,
        query: Optional[str] = None,
        query_model: Optional[Query] = None,
        debug_request: bool = False,
        recall: Optional[Tuple] = None,
        **kwargs
    ) -> VespaResult:
        """
        Send a query request to the Vespa application.

        Either send 'body' containing all the request parameters or specify 'query' and 'query_model'.

        :param body: Dict containing all the request parameters.
        :param query: Query string
        :param query_model: Query model
        :param debug_request: return request body for debugging instead of sending the request.
        :param recall: Tuple of size 2 where the first element is the name of the field to use to recall and the
            second element is a list of the values to be recalled.
        :param kwargs: Additional parameters to be sent along the request.
        :return: Either the request body if debug_request is True or the result from the Vespa application
        """

        if body is None:
            assert query is not None, "No 'query' specified."
            assert query_model is not None, "No 'query_model' specified."
            body = query_model.create_body(query=query)
            if recall is not None:
                body.update(
                    {
                        "recall": "+("
                        + " ".join(
                            ["{}:{}".format(recall[0], str(doc)) for doc in recall[1]]
                        )
                        + ")"
                    }
                )

            body.update(kwargs)

        if debug_request:
            return VespaResult(vespa_result={}, request_body=body)
        else:
            r = post(self.search_end_point, json=body)
            return VespaResult(vespa_result=r.json())
Example #3
0
    def test_collect_training_data_point_0_recall_hits(self):

        self.raw_vespa_result_recall = {
            "root": {
                "id": "toplevel",
                "relevance": 1.0,
                "fields": {
                    "totalCount": 0
                },
                "coverage": {
                    "coverage": 100,
                    "documents": 62529,
                    "full": True,
                    "nodes": 2,
                    "results": 1,
                    "resultsFull": 1,
                },
            }
        }
        self.app.query = Mock(side_effect=[
            VespaResult(self.raw_vespa_result_recall),
            VespaResult(self.raw_vespa_result_additional),
        ])
        query_model = QueryModel(rank_profile=RankProfile(list_features=True))
        data = self.app.collect_training_data_point(
            query="this is a query",
            query_id="123",
            relevant_id="abc",
            id_field="vespa_id_field",
            query_model=query_model,
            number_additional_docs=2,
            fields=["rankfeatures"],
            timeout="15s",
        )

        self.assertEqual(self.app.query.call_count, 1)
        self.app.query.assert_has_calls([
            call(
                query="this is a query",
                query_model=query_model,
                recall=("vespa_id_field", ["abc"]),
                timeout="15s",
            ),
        ])
        expected_data = []
        self.assertEqual(data, expected_data)
Example #4
0
 def test_hits(self):
     empty_hits_vespa_result = VespaResult(
         vespa_result=self.raw_vespa_result_empty_hits)
     self.assertEqual(empty_hits_vespa_result.hits, [])
     vespa_result = VespaResult(vespa_result=self.raw_vespa_result)
     self.assertEqual(
         vespa_result.hits,
         [{
             "id": "id:covid-19:doc::40215",
             "relevance": 30.368213170494712,
             "source": "content",
             "fields": {
                 "sddocname": "doc",
                 "body_text": "this is a body",
                 "title": "this is a title",
             },
         }],
     )
Example #5
0
 def test_json(self):
     vespa_result = VespaResult(vespa_result=self.raw_vespa_result)
     self.assertDictEqual(vespa_result.json, self.raw_vespa_result)