Example #1
0
class TestVespaEvaluate(unittest.TestCase):
    def setUp(self) -> None:
        self.app = Vespa(url="http://localhost", port=8080)

        self.labelled_data = [
            {
                "query_id":
                0,
                "query":
                "Intrauterine virus infections and congenital heart disease",
                "relevant_docs": [{
                    "id": "def",
                    "score": 1
                }, {
                    "id": "abc",
                    "score": 1
                }],
            },
        ]

        self.query_results = {
            "root": {
                "id":
                "toplevel",
                "relevance":
                1.0,
                "fields": {
                    "totalCount": 1083
                },
                "coverage": {
                    "coverage": 100,
                    "documents": 62529,
                    "full": True,
                    "nodes": 2,
                    "results": 1,
                    "resultsFull": 1,
                },
                "children": [
                    {
                        "id": "id:covid-19:doc::40216",
                        "relevance": 10,
                        "source": "content",
                        "fields": {
                            "vespa_id_field": "ghi",
                            "sddocname": "doc",
                            "body_text": "this is a body 2",
                            "title": "this is a title 2",
                            "rankfeatures": {
                                "a": 3,
                                "b": 4
                            },
                        },
                    },
                    {
                        "id": "id:covid-19:doc::40217",
                        "relevance": 8,
                        "source": "content",
                        "fields": {
                            "vespa_id_field": "def",
                            "sddocname": "doc",
                            "body_text": "this is a body 3",
                            "title": "this is a title 3",
                            "rankfeatures": {
                                "a": 5,
                                "b": 6
                            },
                        },
                    },
                ],
            }
        }

    def test_evaluate_query(self):
        self.app.query = Mock(return_value={})
        eval_metric = Mock()
        eval_metric.evaluate_query = Mock(return_value={"metric": 1})
        eval_metric2 = Mock()
        eval_metric2.evaluate_query = Mock(return_value={"metric_2": 2})
        query_model = Query()
        evaluation = self.app.evaluate_query(
            eval_metrics=[eval_metric, eval_metric2],
            query_model=query_model,
            query_id="0",
            query="this is a test",
            id_field="vespa_id_field",
            relevant_docs=self.labelled_data[0]["relevant_docs"],
            default_score=0,
            hits=10,
        )
        self.assertEqual(self.app.query.call_count, 1)
        self.app.query.assert_has_calls([
            call(query="this is a test", query_model=query_model, hits=10),
        ])
        self.assertEqual(eval_metric.evaluate_query.call_count, 1)
        eval_metric.evaluate_query.assert_has_calls([
            call({}, self.labelled_data[0]["relevant_docs"], "vespa_id_field",
                 0),
        ])
        self.assertDictEqual(evaluation, {
            "query_id": "0",
            "metric": 1,
            "metric_2": 2
        })

    def test_evaluate(self):
        self.app.evaluate_query = Mock(side_effect=[
            {
                "query_id": "0",
                "metric": 1
            },
        ])
        evaluation = self.app.evaluate(
            labelled_data=self.labelled_data,
            eval_metrics=[Mock()],
            query_model=Mock(),
            id_field="mock",
            default_score=0,
        )
        assert_frame_equal(
            evaluation, DataFrame.from_records([{
                "query_id": "0",
                "metric": 1
            }]))
Example #2
0
    def test_workflow(self):
        #
        # Connect to a running Vespa Application
        #
        app = Vespa(url="https://api.cord19.vespa.ai")
        #
        # Define a query model
        #
        match_phase = Union(
            WeakAnd(hits=10),
            ANN(
                doc_vector="title_embedding",
                query_vector="title_vector",
                hits=10,
                label="title",
            ),
        )
        rank_profile = Ranking(name="bm25", list_features=True)
        query_model = QueryModel(
            name="ANN_bm25",
            query_properties=[
                QueryRankingFeature(
                    name="title_vector",
                    mapping=lambda x: [random() for x in range(768)],
                )
            ],
            match_phase=match_phase,
            rank_profile=rank_profile,
        )
        #
        # Query Vespa app
        #
        query_result = app.query(
            query="Is remdesivir an effective treatment for COVID-19?",
            query_model=query_model,
        )
        self.assertTrue(query_result.number_documents_retrieved > 0)
        self.assertEqual(len(query_result.hits), 10)
        #
        # Define labelled data
        #
        labeled_data = [
            {
                "query_id": 0,
                "query":
                "Intrauterine virus infections and congenital heart disease",
                "relevant_docs": [{
                    "id": 0,
                    "score": 1
                }, {
                    "id": 3,
                    "score": 1
                }],
            },
            {
                "query_id": 1,
                "query":
                "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus",
                "relevant_docs": [{
                    "id": 1,
                    "score": 1
                }, {
                    "id": 5,
                    "score": 1
                }],
            },
        ]
        # equivalent data in df format
        labeled_data_df = DataFrame(
            data={
                "qid": [0, 0, 1, 1],
                "query":
                ["Intrauterine virus infections and congenital heart disease"]
                * 2 + [
                    "Clinical and immunologic studies in identical twins discordant for systemic lupus erythematosus"
                ] * 2,
                "doc_id": [0, 3, 1, 5],
                "relevance": [1, 1, 1, 1],
            })

        #
        # Collect training data
        #
        training_data_batch = app.collect_training_data(
            labeled_data=labeled_data,
            id_field="id",
            query_model=query_model,
            number_additional_docs=2,
            fields=["rankfeatures"],
        )
        self.assertTrue(training_data_batch.shape[0] > 0)
        self.assertEqual(
            len({"document_id", "query_id",
                 "label"}.intersection(set(training_data_batch.columns))),
            3,
        )
        #
        # Evaluate a query model
        #
        eval_metrics = [MatchRatio(), Recall(at=10), ReciprocalRank(at=10)]
        evaluation = app.evaluate(
            labeled_data=labeled_data,
            eval_metrics=eval_metrics,
            query_model=query_model,
            id_field="id",
        )
        self.assertEqual(evaluation.shape, (9, 1))

        #
        # AssertionError - two models with the same name
        #
        with self.assertRaises(AssertionError):
            _ = app.evaluate(
                labeled_data=labeled_data,
                eval_metrics=eval_metrics,
                query_model=[QueryModel(),
                             QueryModel(), query_model],
                id_field="id",
            )

        evaluation = app.evaluate(
            labeled_data=labeled_data,
            eval_metrics=eval_metrics,
            query_model=[QueryModel(), query_model],
            id_field="id",
        )
        self.assertEqual(evaluation.shape, (9, 2))

        evaluation = app.evaluate(
            labeled_data=labeled_data_df,
            eval_metrics=eval_metrics,
            query_model=query_model,
            id_field="id",
            detailed_metrics=True,
        )
        self.assertEqual(evaluation.shape, (15, 1))

        evaluation = app.evaluate(
            labeled_data=labeled_data_df,
            eval_metrics=eval_metrics,
            query_model=query_model,
            id_field="id",
            detailed_metrics=True,
            per_query=True,
        )
        self.assertEqual(evaluation.shape, (2, 7))