Ejemplo n.º 1
0
 def test_aggs(self):
     node = Terms(name="some_name", field="some_field", size=10)
     some_agg = Aggs().aggs(node, insert_below=None)
     self.assertEqual(
         some_agg.to_dict(),
         {"some_name": {"terms": {"field": "some_field", "size": 10}}},
     )
     # with parent with required nested
     some_agg = Aggs(
         {"term_workflow": {"terms": {"field": "workflow", "size": 5}}},
         mapping=MAPPING,
         nested_autocorrect=True,
     )
     node = Avg(name="min_local_f1", field="local_metrics.performance.test.f1_score")
     some_agg = some_agg.aggs(node, insert_below="term_workflow")
     self.assertEqual(
         some_agg.to_dict(),
         {
             "term_workflow": {
                 "aggs": {
                     "nested_below_term_workflow": {
                         "aggs": {
                             "min_local_f1": {
                                 "avg": {
                                     "field": "local_metrics.performance.test.f1_score"
                                 }
                             }
                         },
                         "nested": {"path": "local_metrics"},
                     }
                 },
                 "terms": {"field": "workflow", "size": 5},
             }
         },
     )
Ejemplo n.º 2
0
    def test_groupby_at_root(self):
        a = (
            Aggs()
            .groupby("one", "terms", field="terms_one")
            .groupby("two", "terms", field="terms_two", at_root=True)
        )
        self.assertEqual(
            a.to_dict(),
            {
                "two": {
                    "terms": {"field": "terms_two"},
                    "aggs": {"one": {"terms": {"field": "terms_one"}}},
                },
            },
        )

        # not at root: default behavior
        a = (
            Aggs()
            .groupby("one", "terms", field="terms_one")
            .groupby("two", "terms", field="terms_two")
        )
        self.assertEqual(
            a.to_dict(),
            {
                "one": {
                    "terms": {"field": "terms_one"},
                    "aggs": {"two": {"terms": {"field": "terms_two"}}},
                },
            },
        )
Ejemplo n.º 3
0
 def test_deserialize_nodes_with_subaggs(self):
     expected = {
         "genres": {
             "terms": {"field": "genres", "size": 3},
             "aggs": {
                 "movie_decade": {
                     "date_histogram": {"field": "year", "fixed_interval": "3650d"}
                 }
             },
         }
     }
     agg1 = Aggs(expected)
     agg2 = Aggs(
         Terms(
             "genres",
             field="genres",
             size=3,
             aggs=DateHistogram(
                 name="movie_decade", field="year", fixed_interval="3650d"
             ),
         )
     )
     agg3 = Aggs(
         Terms(
             "genres",
             field="genres",
             size=3,
             aggs=[
                 DateHistogram(
                     name="movie_decade", field="year", fixed_interval="3650d"
                 )
             ],
         )
     )
     agg4 = Aggs(
         Terms(
             "genres",
             field="genres",
             size=3,
             aggs={
                 "movie_decade": {
                     "date_histogram": {"field": "year", "fixed_interval": "3650d"}
                 }
             },
         )
     )
     agg5 = Aggs(
         {
             "genres": {
                 "terms": {"field": "genres", "size": 3},
                 "aggs": DateHistogram(
                     name="movie_decade", field="year", fixed_interval="3650d"
                 ),
             }
         }
     )
     for a in (agg1, agg2, agg3, agg4, agg5):
         self.assertEqual(a.to_dict(), expected)
Ejemplo n.º 4
0
    def test_interpret_agg_string(self):
        some_agg = Aggs()
        some_agg = some_agg.aggs("some_field", insert_below=None)
        self.assertEqual(
            some_agg.to_dict(), {"some_field": {"terms": {"field": "some_field"}}}
        )

        # with default size
        some_agg = Aggs()
        some_agg = some_agg.aggs("some_field", insert_below=None, size=10)
        self.assertEqual(
            some_agg.to_dict(),
            {"some_field": {"terms": {"field": "some_field", "size": 10}}},
        )

        # with parent
        some_agg = Aggs(
            {"root_agg_name": {"terms": {"field": "some_field", "size": 5}}}
        )
        some_agg = some_agg.aggs("child_field", insert_below="root_agg_name")
        self.assertEqual(
            some_agg.to_dict(),
            {
                "root_agg_name": {
                    "aggs": {"child_field": {"terms": {"field": "child_field"}}},
                    "terms": {"field": "some_field", "size": 5},
                }
            },
        )

        # with required nested
        some_agg = Aggs(
            {"term_workflow": {"terms": {"field": "workflow", "size": 5}}},
            mapping=MAPPING,
            nested_autocorrect=True,
        )
        some_agg = some_agg.aggs(
            "local_metrics.field_class.name", insert_below="term_workflow"
        )
        self.assertEqual(
            some_agg.to_dict(),
            {
                "term_workflow": {
                    "aggs": {
                        "nested_below_term_workflow": {
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "terms": {"field": "local_metrics.field_class.name"}
                                }
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "terms": {"field": "workflow", "size": 5},
                }
            },
        )
Ejemplo n.º 5
0
    def test_deepest_linear_agg(self):
        # deepest_linear_bucket_agg
        """
        week
        └── nested_below_week
            └── local_metrics.field_class.name   <----- HERE because then metric aggregation
                └── avg_f1_score
        """
        node_hierarchy = DateHistogram(
            name="week",
            field="date",
            interval="1w",
            aggs=[
                Terms(
                    name="local_metrics.field_class.name",
                    field="local_metrics.field_class.name",
                    size=10,
                    aggs=[
                        Min(
                            name="min_f1_score",
                            field="local_metrics.performance.test.f1_score",
                        )
                    ],
                )
            ],
        )
        agg = Aggs(node_hierarchy, mapping=MAPPING, nested_autocorrect=True)
        self.assertEqual(
            agg.deepest_linear_bucket_agg, "local_metrics.field_class.name"
        )

        # week is last bucket linear bucket
        node_hierarchy_2 = DateHistogram(
            name="week",
            field="date",
            interval="1w",
            aggs=[
                Terms(
                    name="local_metrics.field_class.name",
                    field="local_metrics.field_class.name",
                    size=10,
                ),
                Filter(
                    name="f1_score_above_threshold",
                    filter={
                        "range": {
                            "local_metrics.performance.test.f1_score": {"gte": 0.5}
                        }
                    },
                ),
            ],
        )
        agg2 = Aggs(node_hierarchy_2, mapping=MAPPING, nested_autocorrect=True)
        self.assertEqual(agg2.deepest_linear_bucket_agg, "week")
Ejemplo n.º 6
0
    def test_validate_aggs_parent_id(self):
        """
        <Aggregation>
        classification_type
        └── global_metrics.field.name
            ├── avg_f1_micro
            └── avg_nb_classes
        """
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)

        with self.assertRaises(ValueError) as e:
            my_agg._validate_aggs_parent_id(pid=None)
        self.assertEqual(
            e.exception.args,
            (
                "Declaration is ambiguous, you must declare the node id under which these "
                "aggregations should be placed.",
            ),
        )

        with self.assertRaises(ValueError) as e:
            my_agg._validate_aggs_parent_id("avg_f1_micro")
        self.assertEqual(
            e.exception.args, ("Node id <avg_f1_micro> is not a bucket aggregation.",)
        )

        self.assertEqual(
            my_agg._validate_aggs_parent_id("global_metrics.field.name"),
            "global_metrics.field.name",
        )

        with self.assertRaises(NotFoundNodeError) as e:
            my_agg._validate_aggs_parent_id("non-existing-node")
        self.assertEqual(
            e.exception.args, ("Node id <non-existing-node> doesn't exist in tree",)
        )

        # linear agg
        my_agg.drop_node("avg_f1_micro")
        my_agg.drop_node("avg_nb_classes")
        """
        <Aggregation>
        classification_type
        └── global_metrics.field.name
        """
        self.assertEqual(
            my_agg._validate_aggs_parent_id(None), "global_metrics.field.name"
        )

        # empty agg
        agg = Aggs()
        self.assertEqual(agg._validate_aggs_parent_id(None), None)
Ejemplo n.º 7
0
    def test_grouping_agg(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        agg_response = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        )

        # none provided
        self.assertEqual(
            agg_response._grouping_agg().identifier, "global_metrics.field.name"
        )
        # fake provided
        with self.assertRaises(ValueError):
            agg_response._grouping_agg("yolo")
        # not bucket provided
        with self.assertRaises(ValueError):
            agg_response._grouping_agg("avg_f1_micro")
        # real provided
        self.assertEqual(
            agg_response._grouping_agg("global_metrics.field.name").identifier,
            "global_metrics.field.name",
        )
Ejemplo n.º 8
0
 def test_parse_as_tree(self, *_):
     my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
     response = Aggregations(
         data=sample.ES_AGG_RESPONSE,
         aggs=my_agg,
         index=None,
         client=None,
         query=None,
     ).to_tree()
     self.assertIsInstance(response, AggsResponseTree)
     self.assertEqual(response.__str__(), sample.EXPECTED_RESPONSE_TREE_REPR)
Ejemplo n.º 9
0
 def test_normalize_buckets(self):
     my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
     response = Aggregations(
         data=sample.ES_AGG_RESPONSE,
         aggs=my_agg,
         index=None,
         client=None,
         query=None,
     ).to_normalized()
     self.assertEqual(
         ordered(response), ordered(sample.EXPECTED_NORMALIZED_RESPONSE)
     )
Ejemplo n.º 10
0
    def test_parse_as_dataframe(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        df = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).to_dataframe()
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(
            set(df.index.names), {"classification_type", "global_metrics.field.name"}
        )
        self.assertEqual(
            set(df.columns), {"avg_f1_micro", "avg_nb_classes", "doc_count"}
        )

        self.assertEqual(
            df.to_dict(orient="index"),
            {
                ("multiclass", "gpc"): {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198,
                },
                ("multiclass", "kind"): {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370,
                },
                ("multilabel", "gpc"): {
                    "avg_f1_micro": 0.95,
                    "avg_nb_classes": 183.21,
                    "doc_count": 119,
                },
                ("multilabel", "ispracticecompatible"): {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128,
                },
                ("multilabel", "preservationmethods"): {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76,
                },
            },
        )
Ejemplo n.º 11
0
    def test_agg_insert_below(self):
        a1 = Aggs(
            Terms("A", field="A", aggs=[Terms("B", field="B"), Terms("C", field="C")])
        )
        self.assertEqual(
            a1.to_dict(),
            {
                "A": {
                    "terms": {"field": "A"},
                    "aggs": {
                        "C": {"terms": {"field": "C"}},
                        "B": {"terms": {"field": "B"}},
                    },
                }
            },
        )

        self.assertEqual(
            a1.aggs(Terms("D", field="D"), insert_below="A").to_dict(),
            {
                "A": {
                    "aggs": {
                        "B": {"terms": {"field": "B"}},
                        "C": {"terms": {"field": "C"}},
                        "D": {"terms": {"field": "D"}},
                    },
                    "terms": {"field": "A"},
                }
            },
        )
        self.assertEqual(
            a1.aggs(
                [Terms("D", field="D"), Terms("E", field="E")], insert_below="A"
            ).to_dict(),
            {
                "A": {
                    "aggs": {
                        "B": {"terms": {"field": "B"}},
                        "C": {"terms": {"field": "C"}},
                        "D": {"terms": {"field": "D"}},
                        "E": {"terms": {"field": "E"}},
                    },
                    "terms": {"field": "A"},
                }
            },
        )
Ejemplo n.º 12
0
    def test_response_tree(self, uuid_mock):
        uuid_mock.side_effect = range(1000)
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        response_tree = AggsResponseTree(aggs=my_agg, index=None).parse(
            sample.ES_AGG_RESPONSE)
        self.assertEqual(response_tree.__str__(),
                         sample.EXPECTED_RESPONSE_TREE_REPR)
        self.assertEqual(len(response_tree.list()), 18)

        multilabel_gpc_bucket = next(
            (b for b in response_tree.list()
             if b.level == "global_metrics.field.name" and b.key == "gpc"))

        # bucket properties will give parents levels and keys
        self.assertEqual(
            response_tree.bucket_properties(multilabel_gpc_bucket),
            OrderedDict([
                ("global_metrics.field.name", "gpc"),
                ("classification_type", "multilabel"),
            ]),
        )
Ejemplo n.º 13
0
    def test_parse_as_tabular_multiple_roots(self):
        # with multiple aggs at root
        my_agg = Aggs(
            {
                "classification_type": {"terms": {"field": "classification_type"}},
                "avg_f1_score": {
                    "avg": {"field": "global_metrics.performance.test.micro.f1_score"}
                },
            }
        )

        raw_response = {
            "classification_type": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {"key": "multiclass", "doc_count": 439},
                    {"key": "multilabel", "doc_count": 433},
                ],
            },
            "avg_f1_score": {"value": 0.815},
        }
        index_names, index_values = Aggregations(
            data=raw_response, aggs=my_agg, index=None, client=None, query=None,
        ).to_tabular(index_orient=True, expand_sep=" || ")

        self.assertEqual(index_names, [])
        self.assertEqual(
            index_values,
            {
                (): {
                    "avg_f1_score": 0.815,
                    "classification_type || multiclass": 439,
                    "classification_type || multilabel": 433,
                }
            },
        )
Ejemplo n.º 14
0
    def test_applied_nested_path_at_node(self):
        """ Check that correct nested path is detected at node levels:
        week
        └── nested_below_week
            └── local_metrics.field_class.name
                ├── avg_f1_score
                ├── max_f1_score
                └── min_f1_score
        """
        node_hierarchy = DateHistogram(
            name="week",
            field="date",
            interval="1w",
            aggs=[
                Terms(
                    name="local_metrics.field_class.name",
                    field="local_metrics.field_class.name",
                    size=10,
                    aggs=[
                        Min(
                            name="min_f1_score",
                            field="local_metrics.performance.test.f1_score",
                        )
                    ],
                )
            ],
        )
        agg = Aggs(node_hierarchy, mapping=MAPPING, nested_autocorrect=True)

        self.assertEqual(agg.applied_nested_path_at_node("week"), None)
        for nid in (
            "nested_below_week",
            "local_metrics.field_class.name",
            "min_f1_score",
        ):
            self.assertEqual(agg.applied_nested_path_at_node(nid), "local_metrics")
Ejemplo n.º 15
0
    def test_add_node_with_mapping(self):
        with_mapping = Aggs(mapping=MAPPING, nested_autocorrect=True)
        self.assertEqual(len(with_mapping.list()), 0)

        # add regular node
        with_mapping = with_mapping.aggs(Terms("workflow", field="workflow"))
        self.assertEqual(
            with_mapping.to_dict(), {"workflow": {"terms": {"field": "workflow"}}}
        )

        # try to add field aggregation on non-existing field will fail
        with self.assertRaises(AbsentMappingFieldError):
            with_mapping.aggs(
                Terms("imaginary_agg", field="imaginary_field"),
                insert_below="workflow",
            )
        self.assertEqual(len(with_mapping.list()), 1)

        # try to add aggregation on a non-compatible field will fail
        with self.assertRaises(InvalidOperationMappingFieldError):
            with_mapping.aggs(
                Avg("average_of_string", field="classification_type"),
                insert_below="workflow",
            )
        self.assertEqual(len(with_mapping.list()), 1)

        # add field aggregation on field passing through nested will automatically add nested
        with_mapping = with_mapping.aggs(
            Avg("local_f1_score", field="local_metrics.performance.test.f1_score"),
            insert_below="workflow",
        )
        self.assertEqual(
            with_mapping.to_dict(),
            {
                "workflow": {
                    "aggs": {
                        "nested_below_workflow": {
                            "aggs": {
                                "local_f1_score": {
                                    "avg": {
                                        "field": "local_metrics.performance.test.f1_score"
                                    }
                                }
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "terms": {"field": "workflow"},
                }
            },
        )
        self.assertIn("nested_below_workflow", with_mapping)
        nested_node = with_mapping.get("nested_below_workflow")
        self.assertEqual(nested_node.KEY, "nested")
        self.assertEqual(nested_node.path, "local_metrics")

        # add other agg requiring nested will reuse nested agg as parent
        with_mapping = with_mapping.aggs(
            Avg("local_precision", field="local_metrics.performance.test.precision"),
            insert_below="workflow",
        )
        self.assertEqual(
            with_mapping.to_dict(),
            {
                "workflow": {
                    "aggs": {
                        "nested_below_workflow": {
                            "aggs": {
                                "local_f1_score": {
                                    "avg": {
                                        "field": "local_metrics.performance.test.f1_score"
                                    }
                                },
                                "local_precision": {
                                    "avg": {
                                        "field": "local_metrics.performance.test.precision"
                                    }
                                },
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "terms": {"field": "workflow"},
                }
            },
        )
        self.assertEqual(len(with_mapping.list()), 4)

        # add under a nested parent a field aggregation that requires to be located under root will automatically
        # add reverse-nested
        with_mapping = with_mapping.aggs(
            Terms("language_terms", field="language"),
            insert_below="nested_below_workflow",
        )
        self.assertEqual(len(with_mapping.list()), 6)
        self.assertEqual(
            with_mapping.to_dict(),
            {
                "workflow": {
                    "aggs": {
                        "nested_below_workflow": {
                            "aggs": {
                                "local_f1_score": {
                                    "avg": {
                                        "field": "local_metrics.performance.test.f1_score"
                                    }
                                },
                                "local_precision": {
                                    "avg": {
                                        "field": "local_metrics.performance.test.precision"
                                    }
                                },
                                "reverse_nested_below_nested_below_workflow": {
                                    "aggs": {
                                        "language_terms": {
                                            "terms": {"field": "language"}
                                        }
                                    },
                                    "reverse_nested": {},
                                },
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "terms": {"field": "workflow"},
                }
            },
        )
Ejemplo n.º 16
0
    def test_insert_tree_without_mapping(self):
        # with explicit nested
        initial_agg_1 = Aggs(
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    }
                }
            },
        )
        self.assertEqual({n.identifier for n in initial_agg_1.list()}, {"week"})

        pasted_agg_1 = Aggs(
            {
                "nested_below_week": {
                    "nested": {"path": "local_metrics"},
                    "aggs": {
                        "local_metrics.field_class.name": {
                            "terms": {
                                "field": "local_metrics.field_class.name",
                                "size": 10,
                            }
                        }
                    },
                }
            }
        )
        self.assertEqual(
            to_id_set(pasted_agg_1.list()),
            {"nested_below_week", "local_metrics.field_class.name"},
        )

        initial_agg_1.insert_tree(pasted_agg_1, "week")
        self.assertEqual(
            to_id_set(initial_agg_1.list()),
            {"week", "nested_below_week", "local_metrics.field_class.name"},
        )
        self.assertEqual(
            initial_agg_1.to_dict(),
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    },
                    "aggs": {
                        "nested_below_week": {
                            "nested": {"path": "local_metrics"},
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "terms": {
                                        "field": "local_metrics.field_class.name",
                                        "size": 10,
                                    }
                                }
                            },
                        }
                    },
                }
            },
        )
Ejemplo n.º 17
0
    def test_parse_as_tabular(self):
        # with single agg at root
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        index_names, index_values = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).to_tabular(index_orient=True)

        self.assertEqual(
            index_names, ["classification_type", "global_metrics.field.name"]
        )
        self.assertEqual(
            index_values,
            {
                ("multilabel", "ispracticecompatible"): {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128,
                },
                ("multilabel", "gpc"): {
                    "avg_f1_micro": 0.95,
                    "avg_nb_classes": 183.21,
                    "doc_count": 119,
                },
                ("multilabel", "preservationmethods"): {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76,
                },
                ("multiclass", "kind"): {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370,
                },
                ("multiclass", "gpc"): {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198,
                },
            },
        )

        # index_orient = False
        index_names, index_values = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).to_tabular(index_orient=False)

        self.assertEqual(
            index_names, ["classification_type", "global_metrics.field.name"]
        )
        self.assertEqual(
            index_values,
            [
                {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "classification_type": "multilabel",
                    "doc_count": 128,
                    "global_metrics.field.name": "ispracticecompatible",
                },
                {
                    "avg_f1_micro": 0.95,
                    "avg_nb_classes": 183.21,
                    "classification_type": "multilabel",
                    "doc_count": 119,
                    "global_metrics.field.name": "gpc",
                },
                {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "classification_type": "multilabel",
                    "doc_count": 76,
                    "global_metrics.field.name": "preservationmethods",
                },
                {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "classification_type": "multiclass",
                    "doc_count": 370,
                    "global_metrics.field.name": "kind",
                },
                {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "classification_type": "multiclass",
                    "doc_count": 198,
                    "global_metrics.field.name": "gpc",
                },
            ],
        )
Ejemplo n.º 18
0
    def test_init_from_node_hierarchy(self):
        node_hierarchy = sample.get_node_hierarchy()

        agg = Aggs(node_hierarchy, mapping=MAPPING)
        self.assertEqual(agg.to_dict(), sample.EXPECTED_AGG_QUERY)

        # with nested
        node_hierarchy = DateHistogram(
            name="week",
            field="date",
            interval="1w",
            aggs=[
                Terms(
                    name="local_metrics.field_class.name",
                    field="local_metrics.field_class.name",
                    size=10,
                    aggs=[
                        Min(
                            name="min_f1_score",
                            field="local_metrics.performance.test.f1_score",
                        )
                    ],
                )
            ],
        )
        agg = Aggs(node_hierarchy, mapping=MAPPING, nested_autocorrect=True)
        self.assertEqual(
            agg.to_dict(),
            {
                "week": {
                    "aggs": {
                        "nested_below_week": {
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "aggs": {
                                        "min_f1_score": {
                                            "min": {
                                                "field": "local_metrics.performance.test.f1_score"
                                            }
                                        }
                                    },
                                    "terms": {
                                        "field": "local_metrics.field_class.name",
                                        "size": 10,
                                    },
                                }
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "date_histogram": {"field": "date", "interval": "1w"},
                }
            },
        )
        self.assertEqual(
            agg.to_dict(),
            {
                "week": {
                    "aggs": {
                        "nested_below_week": {
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "aggs": {
                                        "min_f1_score": {
                                            "min": {
                                                "field": "local_metrics.performance.test.f1_score"
                                            }
                                        }
                                    },
                                    "terms": {
                                        "field": "local_metrics.field_class.name",
                                        "size": 10,
                                    },
                                }
                            },
                            "nested": {"path": "local_metrics"},
                        }
                    },
                    "date_histogram": {"field": "date", "interval": "1w"},
                }
            },
        )
Ejemplo n.º 19
0
 def test_groupby_args_syntax(self):
     a = Aggs().groupby("some_name", "terms", field="some_field")
     self.assertEqual(a.to_dict(), {"some_name": {"terms": {"field": "some_field"}}})
Ejemplo n.º 20
0
    def test_groupby_insert_above(self):
        a1 = Aggs(
            Terms("A", field="A", aggs=[Terms("B", field="B"), Terms("C", field="C")])
        )
        self.assertEqual(
            a1.to_dict(),
            {
                "A": {
                    "terms": {"field": "A"},
                    "aggs": {
                        "B": {"terms": {"field": "B"}},
                        "C": {"terms": {"field": "C"}},
                    },
                }
            },
        )

        self.assertEqual(
            a1.groupby(Terms("D", field="D"), insert_above="B").to_dict(),
            {
                "A": {
                    "terms": {"field": "A"},
                    "aggs": {
                        "C": {"terms": {"field": "C"}},
                        "D": {
                            "terms": {"field": "D"},
                            "aggs": {"B": {"terms": {"field": "B"}}},
                        },
                    },
                }
            },
        )
        self.assertEqual(
            a1.groupby(
                [Terms("D", field="D"), Terms("E", field="E")], insert_above="B"
            ).to_dict(),
            {
                "A": {
                    "terms": {"field": "A"},
                    "aggs": {
                        "C": {"terms": {"field": "C"}},
                        "D": {
                            "terms": {"field": "D"},
                            "aggs": {
                                "E": {
                                    "terms": {"field": "E"},
                                    "aggs": {"B": {"terms": {"field": "B"}}},
                                }
                            },
                        },
                    },
                }
            },
        )
        self.assertEqual(
            a1.groupby(
                Terms("D", field="D", aggs=Terms("E", field="E")), insert_above="B"
            ).to_dict(),
            {
                "A": {
                    "aggs": {
                        "C": {"terms": {"field": "C"}},
                        "D": {
                            "aggs": {
                                "E": {
                                    "aggs": {"B": {"terms": {"field": "B"}}},
                                    "terms": {"field": "E"},
                                }
                            },
                            "terms": {"field": "D"},
                        },
                    },
                    "terms": {"field": "A"},
                }
            },
        )
        # above root
        self.assertEqual(
            a1.groupby(
                Terms("D", field="D", aggs=Terms("E", field="E")), insert_above="A"
            ).to_dict(),
            {
                "D": {
                    "terms": {"field": "D"},
                    "aggs": {
                        "E": {
                            "terms": {"field": "E"},
                            "aggs": {
                                "A": {
                                    "terms": {"field": "A"},
                                    "aggs": {
                                        "B": {"terms": {"field": "B"}},
                                        "C": {"terms": {"field": "C"}},
                                    },
                                }
                            },
                        }
                    },
                }
            },
        )
Ejemplo n.º 21
0
    def test_client_bound_response(self, uuid_mock):
        uuid_mock.side_effect = range(1000)
        client_mock = Mock(spec=["search"])

        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        response_tree = AggsResponseTree(aggs=my_agg, index=None).parse(
            sample.ES_AGG_RESPONSE)

        response = IResponse(
            client=client_mock,
            tree=response_tree,
            index_name="some_index",
            depth=1,
            query={"term": {
                "some_field": 1
            }},
        )

        # ensure that navigation to attributes works with autocompletion (dir is used in ipython)
        self.assertIn("classification_type_multiclass", dir(response))
        self.assertIn("classification_type_multilabel", dir(response))

        multilabel = response.classification_type_multilabel
        self.assertIsInstance(multilabel, IResponse)
        self.assertIs(multilabel._initial_tree, response._tree)

        self.assertIn("global_metrics_field_name_gpc", dir(multilabel))
        gpc = multilabel.global_metrics_field_name_gpc
        self.assertIsInstance(gpc, IResponse)
        self.assertIs(gpc._initial_tree, response._tree)

        # test filter query used to list documents belonging to bucket
        self.assertTrue(
            equal_queries(
                gpc.get_bucket_filter(),
                {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "global_metrics.field.name": {
                                        "value": "gpc"
                                    }
                                }
                            },
                            {
                                "term": {
                                    "classification_type": {
                                        "value": "multilabel"
                                    }
                                }
                            },
                            {
                                "term": {
                                    "some_field": {
                                        "value": 1
                                    }
                                }
                            },
                        ]
                    }
                },
            ))
Ejemplo n.º 22
0
    def test_paste_tree_with_mapping(self):
        # with explicit nested
        initial_agg_1 = Aggs(
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    }
                }
            },
            mapping=MAPPING,
        )
        self.assertEqual(to_id_set(initial_agg_1.list()), {"week"})
        pasted_agg_1 = Aggs(
            {
                "nested_below_week": {
                    "nested": {"path": "local_metrics"},
                    "aggs": {
                        "local_metrics.field_class.name": {
                            "terms": {
                                "field": "local_metrics.field_class.name",
                                "size": 10,
                            }
                        }
                    },
                }
            }
        )
        self.assertEqual(
            to_id_set(pasted_agg_1.list()),
            {"nested_below_week", "local_metrics.field_class.name"},
        )

        initial_agg_1.insert_tree(pasted_agg_1, "week")
        self.assertEqual(
            to_id_set(initial_agg_1.list()),
            {"week", "nested_below_week", "local_metrics.field_class.name"},
        )
        self.assertEqual(
            initial_agg_1.to_dict(),
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    },
                    "aggs": {
                        "nested_below_week": {
                            "nested": {"path": "local_metrics"},
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "terms": {
                                        "field": "local_metrics.field_class.name",
                                        "size": 10,
                                    }
                                }
                            },
                        }
                    },
                }
            },
        )

        # without explicit nested
        initial_agg_2 = Aggs(
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    }
                }
            },
            mapping=MAPPING,
            nested_autocorrect=True,
        )
        self.assertEqual(to_id_set(initial_agg_2.list()), {"week"})
        pasted_agg_2 = Aggs(
            {
                "local_metrics.field_class.name": {
                    "terms": {"field": "local_metrics.field_class.name", "size": 10}
                }
            }
        )
        self.assertEqual(
            to_id_set(pasted_agg_2.list()), {"local_metrics.field_class.name"}
        )

        initial_agg_2.insert_tree(pasted_agg_2, "week")
        self.assertEqual(
            to_id_set(initial_agg_2.list()),
            {"week", "nested_below_week", "local_metrics.field_class.name"},
        )
        self.assertEqual(
            initial_agg_2.to_dict(),
            {
                "week": {
                    "date_histogram": {
                        "field": "date",
                        "format": "yyyy-MM-dd",
                        "interval": "1w",
                    },
                    "aggs": {
                        "nested_below_week": {
                            "nested": {"path": "local_metrics"},
                            "aggs": {
                                "local_metrics.field_class.name": {
                                    "terms": {
                                        "field": "local_metrics.field_class.name",
                                        "size": 10,
                                    }
                                }
                            },
                        }
                    },
                }
            },
        )