def test_parse_as_dataframe(self): my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING) df = Aggregations( data=sample.ES_AGG_RESPONSE, aggs=my_agg, index=None, client=None, query=None, ).serialize_as_dataframe() self.assertIsInstance(df, pd.DataFrame) self.assertEqual(set(df.index.names), {"classification_type", "global_metrics.field.name"}) self.assertEqual(set(df.columns), {"avg_f1_micro", "avg_nb_classes", "doc_count"}) self.assertEqual( df.index.to_list(), [ ("multilabel", "ispracticecompatible"), ("multilabel", "gpc"), ("multilabel", "preservationmethods"), ("multiclass", "kind"), ("multiclass", "gpc"), ], ) self.assertEqual( df.to_dict(orient="rows"), [ { "avg_f1_micro": 0.72, "avg_nb_classes": 18.71, "doc_count": 128 }, { "avg_f1_micro": 0.95, "avg_nb_classes": 183.21, "doc_count": 119 }, { "avg_f1_micro": 0.8, "avg_nb_classes": 9.97, "doc_count": 76 }, { "avg_f1_micro": 0.89, "avg_nb_classes": 206.5, "doc_count": 370 }, { "avg_f1_micro": 0.93, "avg_nb_classes": 211.12, "doc_count": 198 }, ], )
def test_parse_as_dataframe(self): my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS) df = Aggregations(data=sample.ES_AGG_RESPONSE, _search=Search().aggs(my_agg)).to_dataframe( grouped_by="global_metrics.field.name") self.assertIsInstance(df, pd.DataFrame) self.assertEqual(set(df.index.names), {"classification_type", "global_metrics.field.name"}) self.assertEqual(set(df.columns), {"avg_f1_micro", "avg_nb_classes", "doc_count"}) self.assertEqual( df.to_dict(orient="index"), { ("multiclass", "gpc"): { "avg_f1_micro": 0.93, "avg_nb_classes": 211.12, "doc_count": 198, }, ("multiclass", "kind"): { "avg_f1_micro": 0.89, "avg_nb_classes": 206.5, "doc_count": 370, }, ("multilabel", "ispracticecompatible"): { "avg_f1_micro": 0.72, "avg_nb_classes": 18.71, "doc_count": 128, }, ("multilabel", "preservationmethods"): { "avg_f1_micro": 0.8, "avg_nb_classes": 9.97, "doc_count": 76, }, }, )