Example #1
0
    def test_parse_as_dataframe(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        df = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).serialize_as_dataframe()
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(set(df.index.names),
                         {"classification_type", "global_metrics.field.name"})
        self.assertEqual(set(df.columns),
                         {"avg_f1_micro", "avg_nb_classes", "doc_count"})
        self.assertEqual(
            df.index.to_list(),
            [
                ("multilabel", "ispracticecompatible"),
                ("multilabel", "gpc"),
                ("multilabel", "preservationmethods"),
                ("multiclass", "kind"),
                ("multiclass", "gpc"),
            ],
        )

        self.assertEqual(
            df.to_dict(orient="rows"),
            [
                {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128
                },
                {
                    "avg_f1_micro": 0.95,
                    "avg_nb_classes": 183.21,
                    "doc_count": 119
                },
                {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76
                },
                {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370
                },
                {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198
                },
            ],
        )
Example #2
0
    def test_parse_as_dataframe(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS)
        df = Aggregations(data=sample.ES_AGG_RESPONSE,
                          _search=Search().aggs(my_agg)).to_dataframe(
                              grouped_by="global_metrics.field.name")
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(set(df.index.names),
                         {"classification_type", "global_metrics.field.name"})
        self.assertEqual(set(df.columns),
                         {"avg_f1_micro", "avg_nb_classes", "doc_count"})

        self.assertEqual(
            df.to_dict(orient="index"),
            {
                ("multiclass", "gpc"): {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198,
                },
                ("multiclass", "kind"): {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370,
                },
                ("multilabel", "ispracticecompatible"): {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128,
                },
                ("multilabel", "preservationmethods"): {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76,
                },
            },
        )