コード例 #1
0
    def update_from_dict(self, d: Dict) -> "Search":
        """
        Apply options from a serialized body to the current instance. Modifies
        the object in-place. Used mostly by ``from_dict``.
        """
        d = d.copy()
        if "query" in d:
            self._query = Query(d.pop("query"))
        if "post_filter" in d:
            self._post_filter = Query(d.pop("post_filter"))

        aggs = d.pop("aggs", d.pop("aggregations", {}))
        if aggs:
            self._aggs = Aggs(aggs)
        if "sort" in d:
            self._sort = d.pop("sort")
        if "_source" in d:
            self._source = d.pop("_source")
        if "highlight" in d:
            high = d.pop("highlight").copy()
            self._highlight = high.pop("fields")
            self._highlight_opts = high
        if "suggest" in d:
            self._suggest = d.pop("suggest")
            if "text" in self._suggest:
                text = self._suggest.pop("text")
                for s in self._suggest.values():
                    s.setdefault("text", text)
        if "script_fields" in d:
            self._script_fields = d.pop("script_fields")
        self._params.update(d)
        return self
コード例 #2
0
    def test_parse_as_tabular(self):
        # with single agg at root
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        index_names, index_values = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).serialize_as_tabular(row_as_tuple=True)

        self.assertEqual(index_names,
                         ["classification_type", "global_metrics.field.name"])
        self.assertEqual(
            index_values,
            [
                (
                    ("multilabel", "ispracticecompatible"),
                    {
                        "avg_f1_micro": 0.72,
                        "avg_nb_classes": 18.71,
                        "doc_count": 128
                    },
                ),
                (
                    ("multilabel", "gpc"),
                    {
                        "avg_f1_micro": 0.95,
                        "avg_nb_classes": 183.21,
                        "doc_count": 119
                    },
                ),
                (
                    ("multilabel", "preservationmethods"),
                    {
                        "avg_f1_micro": 0.8,
                        "avg_nb_classes": 9.97,
                        "doc_count": 76
                    },
                ),
                (
                    ("multiclass", "kind"),
                    {
                        "avg_f1_micro": 0.89,
                        "avg_nb_classes": 206.5,
                        "doc_count": 370
                    },
                ),
                (
                    ("multiclass", "gpc"),
                    {
                        "avg_f1_micro": 0.93,
                        "avg_nb_classes": 211.12,
                        "doc_count": 198
                    },
                ),
            ],
        )
コード例 #3
0
def get_wrapper_declared_agg():
    return (Aggs(mapping=MAPPING).groupby(
        ["classification_type", "global_metrics.field.name"]).aggs([
            Avg("avg_nb_classes", field="global_metrics.dataset.nb_classes"),
            Avg(
                "avg_f1_micro",
                field="global_metrics.performance.test.micro.f1_score",
            ),
        ]))
コード例 #4
0
    def test_parse_as_tabular_multiple_roots(self):
        # with multiple aggs at root
        my_agg = Aggs({
            "classification_type": {
                "terms": {
                    "field": "classification_type"
                }
            },
            "avg_f1_score": {
                "avg": {
                    "field": "global_metrics.performance.test.micro.f1_score"
                }
            },
        })

        raw_response = {
            "classification_type": {
                "doc_count_error_upper_bound":
                0,
                "sum_other_doc_count":
                0,
                "buckets": [
                    {
                        "key": "multiclass",
                        "doc_count": 439
                    },
                    {
                        "key": "multilabel",
                        "doc_count": 433
                    },
                ],
            },
            "avg_f1_score": {
                "value": 0.815
            },
        }
        index_names, index_values = Aggregations(
            data=raw_response,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).serialize_as_tabular(row_as_tuple=True, expand_sep=" || ")

        self.assertEqual(index_names, [])
        self.assertEqual(
            index_values,
            [(
                (),
                {
                    "avg_f1_score": 0.815,
                    "classification_type || multiclass": 439,
                    "classification_type || multilabel": 433,
                },
            )],
        )
コード例 #5
0
    def test_parse_as_dataframe(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        df = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            aggs=my_agg,
            index=None,
            client=None,
            query=None,
        ).serialize_as_dataframe()
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(set(df.index.names),
                         {"classification_type", "global_metrics.field.name"})
        self.assertEqual(set(df.columns),
                         {"avg_f1_micro", "avg_nb_classes", "doc_count"})
        self.assertEqual(
            df.index.to_list(),
            [
                ("multilabel", "ispracticecompatible"),
                ("multilabel", "gpc"),
                ("multilabel", "preservationmethods"),
                ("multiclass", "kind"),
                ("multiclass", "gpc"),
            ],
        )

        self.assertEqual(
            df.to_dict(orient="rows"),
            [
                {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128
                },
                {
                    "avg_f1_micro": 0.95,
                    "avg_nb_classes": 183.21,
                    "doc_count": 119
                },
                {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76
                },
                {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370
                },
                {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198
                },
            ],
        )
コード例 #6
0
 def test_normalize_buckets(self):
     my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
     response = Aggregations(
         data=sample.ES_AGG_RESPONSE,
         aggs=my_agg,
         index=None,
         client=None,
         query=None,
     ).serialize_as_normalized()
     self.assertEqual(ordered(response),
                      ordered(sample.EXPECTED_NORMALIZED_RESPONSE))
コード例 #7
0
 def test_parse_as_tree(self, *_):
     my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
     response = Aggregations(
         data=sample.ES_AGG_RESPONSE,
         aggs=my_agg,
         index=None,
         client=None,
         query=None,
     ).serialize_as_tree()
     self.assertIsInstance(response, AggsResponseTree)
     self.assertEqual(response.__str__(),
                      sample.EXPECTED_RESPONSE_TREE_REPR)
コード例 #8
0
ファイル: search.py プロジェクト: leonardbinet/pandagg
    def __init__(self, using=None, index=None, mapping=None):
        """
        Search request to elasticsearch.

        :arg using: `Elasticsearch` instance to use
        :arg index: limit the search to index
        :arg mapping: mapping used for query validation

        All the parameters supplied (or omitted) at creation type can be later
        overridden by methods (`using`, `index` and `mapping` respectively).
        """

        self._sort = []
        self._source = None
        self._highlight = {}
        self._highlight_opts = {}
        self._suggest = {}
        self._script_fields = {}
        mapping = Mapping(mapping)
        self._mapping = mapping
        self._aggs = Aggs(mapping=mapping)
        self._query = Query(mapping=mapping)
        self._post_filter = Query(mapping=mapping)
        super(Search, self).__init__(using=using, index=index)
コード例 #9
0
ファイル: test_response.py プロジェクト: alkemics/pandagg
    def test_grouping_agg(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS)
        agg_response = Aggregations(data=sample.ES_AGG_RESPONSE,
                                    _search=Search().aggs(my_agg))

        # none provided
        self.assertIsNone(agg_response._grouping_agg()[0])
        # fake provided
        with self.assertRaises(KeyError):
            agg_response._grouping_agg("yolo")
        # not bucket provided
        with self.assertRaises(ValueError):
            agg_response._grouping_agg("avg_f1_micro")
        # real provided
        self.assertEqual(
            agg_response._grouping_agg("global_metrics.field.name")[0],
            "global_metrics.field.name",
        )
コード例 #10
0
    def test_response_tree(self, uuid_mock):
        uuid_mock.side_effect = range(1000)
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        response_tree = AggsResponseTree(aggs=my_agg, index=None).parse(
            sample.ES_AGG_RESPONSE)
        self.assertEqual(response_tree.__str__(),
                         sample.EXPECTED_RESPONSE_TREE_REPR)
        self.assertEqual(len(response_tree.list()), 18)

        multilabel_gpc_bucket = next(
            (b for b in response_tree.list()
             if b.level == "global_metrics.field.name" and b.key == "gpc"))

        # bucket properties will give parents levels and keys
        self.assertEqual(
            response_tree.bucket_properties(multilabel_gpc_bucket),
            OrderedDict([
                ("global_metrics.field.name", "gpc"),
                ("classification_type", "multilabel"),
            ]),
        )
コード例 #11
0
    def __init__(
        self,
        using: Optional[Elasticsearch] = None,
        index: Optional[Union[str, Tuple[str], List[str]]] = None,
        mappings: Optional[Union[MappingsDict, Mappings]] = None,
        nested_autocorrect: bool = False,
        repr_auto_execute: bool = False,
        document_class: DocumentMeta = None,
    ) -> None:
        """
        Search request to elasticsearch.

        :arg using: `Elasticsearch` instance to use
        :arg index: limit the search to index
        :arg mappings: mappings used for query validation
        :arg nested_autocorrect: in case of missing nested clause, will insert it automatically
        :arg repr_auto_execute: execute query and display results as dataframe, requires client to be provided

        All the parameters supplied (or omitted) at creation type can be later
        overridden by methods (`using`, `index` and `mappings` respectively).
        """

        self._sort: List[Union[str, Dict[str, Any]]] = []
        self._source: Any = None
        self._highlight: Dict[str, Any] = {}
        self._highlight_opts: Dict[str, Any] = {}
        self._suggest: Dict[str, Any] = {}
        self._script_fields: Dict[str, Any] = {}
        mappings = _mappings(mappings)
        self._mappings: Optional[Mappings] = mappings
        self._aggs: Aggs = Aggs(mappings=mappings,
                                nested_autocorrect=nested_autocorrect)
        self._query: Query = Query(mappings=mappings,
                                   nested_autocorrect=nested_autocorrect)
        self._post_filter: Query = Query(mappings=mappings,
                                         nested_autocorrect=nested_autocorrect)
        self._repr_auto_execute: bool = repr_auto_execute
        self._document_class: Optional[DocumentMeta] = document_class
        super(Search, self).__init__(using=using, index=index)
コード例 #12
0
ファイル: test_response.py プロジェクト: alkemics/pandagg
    def test_parse_as_dataframe(self):
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS)
        df = Aggregations(data=sample.ES_AGG_RESPONSE,
                          _search=Search().aggs(my_agg)).to_dataframe(
                              grouped_by="global_metrics.field.name")
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(set(df.index.names),
                         {"classification_type", "global_metrics.field.name"})
        self.assertEqual(set(df.columns),
                         {"avg_f1_micro", "avg_nb_classes", "doc_count"})

        self.assertEqual(
            df.to_dict(orient="index"),
            {
                ("multiclass", "gpc"): {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198,
                },
                ("multiclass", "kind"): {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370,
                },
                ("multilabel", "ispracticecompatible"): {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128,
                },
                ("multilabel", "preservationmethods"): {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76,
                },
            },
        )
コード例 #13
0
    def test_client_bound_response(self, uuid_mock):
        uuid_mock.side_effect = range(1000)
        client_mock = Mock(spec=["search"])

        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mapping=MAPPING)
        response_tree = AggsResponseTree(aggs=my_agg, index=None).parse(
            sample.ES_AGG_RESPONSE)

        response = IResponse(
            client=client_mock,
            tree=response_tree,
            index_name="some_index",
            depth=1,
            query={"term": {
                "some_field": 1
            }},
        )

        # ensure that navigation to attributes works with autocompletion (dir is used in ipython)
        self.assertIn("classification_type_multiclass", dir(response))
        self.assertIn("classification_type_multilabel", dir(response))

        multilabel = response.classification_type_multilabel
        self.assertIsInstance(multilabel, IResponse)
        self.assertIs(multilabel._initial_tree, response._tree)

        self.assertIn("global_metrics_field_name_gpc", dir(multilabel))
        gpc = multilabel.global_metrics_field_name_gpc
        self.assertIsInstance(gpc, IResponse)
        self.assertIs(gpc._initial_tree, response._tree)

        # test filter query used to list documents belonging to bucket
        self.assertTrue(
            equal_queries(
                gpc.get_bucket_filter(),
                {
                    "bool": {
                        "must": [
                            {
                                "term": {
                                    "global_metrics.field.name": {
                                        "value": "gpc"
                                    }
                                }
                            },
                            {
                                "term": {
                                    "classification_type": {
                                        "value": "multilabel"
                                    }
                                }
                            },
                            {
                                "term": {
                                    "some_field": {
                                        "value": 1
                                    }
                                }
                            },
                        ]
                    }
                },
            ))
コード例 #14
0
ファイル: test_response.py プロジェクト: alkemics/pandagg
    def test_parse_as_tabular(self):
        # with single agg at root
        my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS)
        index_names, index_values = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            _search=Search().aggs(my_agg)).to_tabular(
                index_orient=True, grouped_by="global_metrics.field.name")

        self.assertEqual(index_names,
                         ["classification_type", "global_metrics.field.name"])
        self.assertEqual(
            index_values,
            {
                ("multilabel", "ispracticecompatible"): {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "doc_count": 128,
                },
                ("multilabel", "preservationmethods"): {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "doc_count": 76,
                },
                ("multiclass", "kind"): {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "doc_count": 370,
                },
                ("multiclass", "gpc"): {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "doc_count": 198,
                },
            },
        )

        # index_orient = False
        index_names, index_values = Aggregations(
            data=sample.ES_AGG_RESPONSE,
            _search=Search().aggs(my_agg)).to_tabular(
                index_orient=False, grouped_by="global_metrics.field.name")

        self.assertEqual(index_names,
                         ["classification_type", "global_metrics.field.name"])
        self.assertEqual(
            index_values,
            [
                {
                    "avg_f1_micro": 0.72,
                    "avg_nb_classes": 18.71,
                    "classification_type": "multilabel",
                    "doc_count": 128,
                    "global_metrics.field.name": "ispracticecompatible",
                },
                {
                    "avg_f1_micro": 0.8,
                    "avg_nb_classes": 9.97,
                    "classification_type": "multilabel",
                    "doc_count": 76,
                    "global_metrics.field.name": "preservationmethods",
                },
                {
                    "avg_f1_micro": 0.89,
                    "avg_nb_classes": 206.5,
                    "classification_type": "multiclass",
                    "doc_count": 370,
                    "global_metrics.field.name": "kind",
                },
                {
                    "avg_f1_micro": 0.93,
                    "avg_nb_classes": 211.12,
                    "classification_type": "multiclass",
                    "doc_count": 198,
                    "global_metrics.field.name": "gpc",
                },
            ],
        )
コード例 #15
0
ファイル: test_response.py プロジェクト: alkemics/pandagg
    def test_parse_as_tabular_multiple_roots(self):
        # with multiple aggs at root
        my_agg = Aggs({
            "classification_type": {
                "terms": {
                    "field": "classification_type"
                }
            },
            "avg_f1_score": {
                "avg": {
                    "field": "global_metrics.performance.test.micro.f1_score"
                }
            },
        })

        raw_response = {
            "classification_type": {
                "doc_count_error_upper_bound":
                0,
                "sum_other_doc_count":
                0,
                "buckets": [
                    {
                        "key": "multiclass",
                        "doc_count": 439
                    },
                    {
                        "key": "multilabel",
                        "doc_count": 433
                    },
                ],
            },
            "avg_f1_score": {
                "value": 0.815
            },
        }
        index_names, index_values = Aggregations(
            data=raw_response,
            _search=Search().aggs(my_agg)).to_tabular(index_orient=True,
                                                      expand_sep=" || ")

        self.assertEqual(index_names, [])
        self.assertEqual(
            index_values,
            {
                (): {
                    "avg_f1_score": 0.815,
                    "classification_type || multiclass": 439,
                    "classification_type || multilabel": 433,
                }
            },
        )

        # with specified grouped_by
        index_names, index_values = Aggregations(
            data=raw_response, _search=Search().aggs(my_agg)).to_tabular(
                grouped_by="classification_type")
        self.assertEqual(index_names, ["classification_type"])
        self.assertEqual(
            index_values,
            {
                ("multiclass", ): {
                    "doc_count": 439
                },
                ("multilabel", ): {
                    "doc_count": 433
                }
            },
        )
コード例 #16
0
ファイル: search.py プロジェクト: leonardbinet/pandagg
class Search(Request):
    def __init__(self, using=None, index=None, mapping=None):
        """
        Search request to elasticsearch.

        :arg using: `Elasticsearch` instance to use
        :arg index: limit the search to index
        :arg mapping: mapping used for query validation

        All the parameters supplied (or omitted) at creation type can be later
        overridden by methods (`using`, `index` and `mapping` respectively).
        """

        self._sort = []
        self._source = None
        self._highlight = {}
        self._highlight_opts = {}
        self._suggest = {}
        self._script_fields = {}
        mapping = Mapping(mapping)
        self._mapping = mapping
        self._aggs = Aggs(mapping=mapping)
        self._query = Query(mapping=mapping)
        self._post_filter = Query(mapping=mapping)
        super(Search, self).__init__(using=using, index=index)

    def query(self, *args, **kwargs):
        s = self._clone()
        s._query = s._query.query(*args, **kwargs)
        return s

    query.__doc__ = Query.query.__doc__

    def filter(self, *args, **kwargs):
        s = self._clone()
        s._query = s._query.filter(*args, **kwargs)
        return s

    filter.__doc__ = Query.filter.__doc__

    def must_not(self, *args, **kwargs):
        s = self._clone()
        s._query = s._query.must_not(*args, **kwargs)
        return s

    must_not.__doc__ = Query.must_not.__doc__

    def should(self, *args, **kwargs):
        s = self._clone()
        s._query = s._query.should(*args, **kwargs)
        return s

    should.__doc__ = Query.should.__doc__

    def must(self, *args, **kwargs):
        s = self._clone()
        s._query = s._query.must(*args, **kwargs)
        return s

    must.__doc__ = Query.must.__doc__

    def aggs(self, *args, **kwargs):
        s = self._clone()
        s._aggs = s._aggs.aggs(*args, **kwargs)
        return s

    aggs.__doc__ = Aggs.aggs.__doc__

    def groupby(self, *args, **kwargs):
        s = self._clone()
        s._aggs = s._aggs.groupby(*args, **kwargs)
        return s

    groupby.__doc__ = Aggs.groupby.__doc__

    def __iter__(self):
        """
        Iterate over the hits.
        """
        return iter(self.execute())

    def __getitem__(self, n):
        """
        Support slicing the `Search` instance for pagination.

        Slicing equates to the from/size parameters. E.g.::

            s = Search().query(...)[0:25]

        is equivalent to::

            s = Search().query(...).params(from=0, size=25)

        """
        s = self._clone()

        if isinstance(n, slice):
            # If negative slicing, abort.
            if n.start and n.start < 0 or n.stop and n.stop < 0:
                raise ValueError("Search does not support negative slicing.")
            # Elasticsearch won't get all results so we default to size: 10 if
            # stop not given.
            s._params["from"] = n.start or 0
            s._params["size"] = n.stop - (n.start
                                          or 0) if n.stop is not None else 10
            return s
        else:  # This is an index lookup, equivalent to slicing by [n:n+1].
            # If negative index, abort.
            if n < 0:
                raise ValueError("Search does not support negative indexing.")
            s._params["from"] = n
            s._params["size"] = 1
            return s

    def size(self, size):
        """Equivalent to::

            s = Search().params(size=size)

        """
        s = self._clone()
        s._params["size"] = size
        return s

    @classmethod
    def from_dict(cls, d):
        """
        Construct a new `Search` instance from a raw dict containing the search
        body. Useful when migrating from raw dictionaries.

        Example::

            s = Search.from_dict({
                "query": {
                    "bool": {
                        "must": [...]
                    }
                },
                "aggs": {...}
            })
            s = s.filter('term', published=True)
        """
        s = cls()
        s.update_from_dict(d)
        return s

    def _clone(self):
        """
        Return a clone of the current search request. Performs a shallow copy
        of all the underlying objects. Used internally by most state modifying
        APIs.
        """
        s = self.__class__(using=self._using,
                           index=self._index,
                           mapping=self._mapping)
        s._params = self._params.copy()
        s._sort = self._sort[:]
        s._source = copy.copy(
            self._source) if self._source is not None else None
        s._highlight = self._highlight.copy()
        s._highlight_opts = self._highlight_opts.copy()
        s._suggest = self._suggest.copy()
        s._script_fields = self._script_fields.copy()
        s._aggs = self._aggs.clone()
        s._query = self._query.clone()
        s._post_filter = self._post_filter.clone()
        s._mapping = self._mapping.clone()
        return s

    def update_from_dict(self, d):
        """
        Apply options from a serialized body to the current instance. Modifies
        the object in-place. Used mostly by ``from_dict``.
        """
        d = d.copy()
        if "query" in d:
            self._query = Query(d.pop("query"))
        if "post_filter" in d:
            self._post_filter = Query(d.pop("post_filter"))

        aggs = d.pop("aggs", d.pop("aggregations", {}))
        if aggs:
            self._aggs = Aggs(aggs)
        if "sort" in d:
            self._sort = d.pop("sort")
        if "_source" in d:
            self._source = d.pop("_source")
        if "highlight" in d:
            high = d.pop("highlight").copy()
            self._highlight = high.pop("fields")
            self._highlight_opts = high
        if "suggest" in d:
            self._suggest = d.pop("suggest")
            if "text" in self._suggest:
                text = self._suggest.pop("text")
                for s in self._suggest.values():
                    s.setdefault("text", text)
        if "script_fields" in d:
            self._script_fields = d.pop("script_fields")
        self._params.update(d)
        return self

    def script_fields(self, **kwargs):
        """
        Define script fields to be calculated on hits. See
        https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html
        for more details.

        Example::

            s = Search()
            s = s.script_fields(times_two="doc['field'].value * 2")
            s = s.script_fields(
                times_three={
                    'script': {
                        'inline': "doc['field'].value * params.n",
                        'params': {'n': 3}
                    }
                }
            )

        """
        s = self._clone()
        for name in kwargs:
            if isinstance(kwargs[name], string_types):
                kwargs[name] = {"script": kwargs[name]}
        s._script_fields.update(kwargs)
        return s

    def source(self, fields=None, **kwargs):
        """
        Selectively control how the _source field is returned.

        :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes

        If ``fields`` is None, the entire document will be returned for
        each hit.  If fields is a dictionary with keys of 'includes' and/or
        'excludes' the fields will be either included or excluded appropriately.

        Calling this multiple times with the same named parameter will override the
        previous values with the new ones.

        Example::

            s = Search()
            s = s.source(includes=['obj1.*'], excludes=["*.description"])

            s = Search()
            s = s.source(includes=['obj1.*']).source(excludes=["*.description"])

        """
        s = self._clone()

        if fields and kwargs:
            raise ValueError(
                "You cannot specify fields and kwargs at the same time.")

        if fields is not None:
            s._source = fields
            return s

        if kwargs and not isinstance(s._source, dict):
            s._source = {}

        for key, value in kwargs.items():
            if value is None:
                try:
                    del s._source[key]
                except KeyError:
                    pass
            else:
                s._source[key] = value

        return s

    def sort(self, *keys):
        """
        Add sorting information to the search request. If called without
        arguments it will remove all sort requirements. Otherwise it will
        replace them. Acceptable arguments are::

            'some.field'
            '-some.other.field'
            {'different.field': {'any': 'dict'}}

        so for example::

            s = Search().sort(
                'category',
                '-title',
                {"price" : {"order" : "asc", "mode" : "avg"}}
            )

        will sort by ``category``, ``title`` (in descending order) and
        ``price`` in ascending order using the ``avg`` mode.

        The API returns a copy of the Search object and can thus be chained.
        """
        s = self._clone()
        s._sort = []
        for k in keys:
            if isinstance(k, string_types) and k.startswith("-"):
                if k[1:] == "_score":
                    raise ValueError("Sorting by `-_score` is not allowed.")
                k = {k[1:]: {"order": "desc"}}
            s._sort.append(k)
        return s

    def highlight_options(self, **kwargs):
        """
        Update the global highlighting options used for this request. For
        example::

            s = Search()
            s = s.highlight_options(order='score')
        """
        s = self._clone()
        s._highlight_opts.update(kwargs)
        return s

    def highlight(self, *fields, **kwargs):
        """
        Request highlighting of some fields. All keyword arguments passed in will be
        used as parameters for all the fields in the ``fields`` parameter. Example::

            Search().highlight('title', 'body', fragment_size=50)

        will produce the equivalent of::

            {
                "highlight": {
                    "fields": {
                        "body": {"fragment_size": 50},
                        "title": {"fragment_size": 50}
                    }
                }
            }

        If you want to have different options for different fields you can call ``highlight`` twice::

            Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100)

        which will produce::

            {
                "highlight": {
                    "fields": {
                        "body": {"fragment_size": 100},
                        "title": {"fragment_size": 50}
                    }
                }
            }

        """
        s = self._clone()
        for f in fields:
            s._highlight[f] = kwargs
        return s

    def suggest(self, name, text, **kwargs):
        """
        Add a suggestions request to the search.

        :arg name: name of the suggestion
        :arg text: text to suggest on

        All keyword arguments will be added to the suggestions body. For example::

            s = Search()
            s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'})
        """
        s = self._clone()
        s._suggest[name] = {"text": text}
        s._suggest[name].update(kwargs)
        return s

    def to_dict(self, count=False, **kwargs):
        """
        Serialize the search into the dictionary that will be sent over as the
        request's body.

        :arg count: a flag to specify if we are interested in a body for count -
            no aggregations, no pagination bounds etc.

        All additional keyword arguments will be included into the dictionary.
        """
        d = {}

        if self._query:
            d["query"] = self._query.to_dict()

        # count request doesn't care for sorting and other things
        if not count:
            if self._post_filter:
                d["post_filter"] = self._post_filter.to_dict()

            if self._aggs:
                d["aggs"] = self._aggs.to_dict()

            if self._sort:
                d["sort"] = self._sort

            d.update(self._params)

            if self._source not in (None, {}):
                d["_source"] = self._source

            if self._highlight:
                d["highlight"] = {"fields": self._highlight}
                d["highlight"].update(self._highlight_opts)

            if self._suggest:
                d["suggest"] = self._suggest

            if self._script_fields:
                d["script_fields"] = self._script_fields

        d.update(kwargs)
        return d

    def count(self):
        """
        Return the number of hits matching the query and filters. Note that
        only the actual number is returned.
        """
        es = get_connection(self._using)

        d = self.to_dict(count=True)
        return es.count(index=self._index, body=d)["count"]

    def execute(self):
        """
        Execute the search and return an instance of ``Response`` wrapping all
        the data.
        """
        es = get_connection(self._using)
        return Response(es.search(index=self._index, body=self.to_dict()),
                        search=self)

    def scan(self):
        """
        Turn the search into a scan search and return a generator that will
        iterate over all the documents matching the query.

        Use ``params`` method to specify any additional arguments you with to
        pass to the underlying ``scan`` helper from ``elasticsearch-py`` -
        https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan

        """
        es = get_connection(self._using)

        for hit in scan(es, query=self.to_dict(), index=self._index):
            yield hit

    def delete(self):
        """
        delete() executes the query by delegating to delete_by_query()
        """

        es = get_connection(self._using)

        return es.delete_by_query(index=self._index, body=self.to_dict())

    def __repr__(self):
        return json.dumps(self.to_dict(), indent=2)
コード例 #17
0
class Search(DSLMixin, Request):

    _type_name = "search"

    def __init__(
        self,
        using: Optional[Elasticsearch] = None,
        index: Optional[Union[str, Tuple[str], List[str]]] = None,
        mappings: Optional[Union[MappingsDict, Mappings]] = None,
        nested_autocorrect: bool = False,
        repr_auto_execute: bool = False,
        document_class: DocumentMeta = None,
    ) -> None:
        """
        Search request to elasticsearch.

        :arg using: `Elasticsearch` instance to use
        :arg index: limit the search to index
        :arg mappings: mappings used for query validation
        :arg nested_autocorrect: in case of missing nested clause, will insert it automatically
        :arg repr_auto_execute: execute query and display results as dataframe, requires client to be provided

        All the parameters supplied (or omitted) at creation type can be later
        overridden by methods (`using`, `index` and `mappings` respectively).
        """

        self._sort: List[Union[str, Dict[str, Any]]] = []
        self._source: Any = None
        self._highlight: Dict[str, Any] = {}
        self._highlight_opts: Dict[str, Any] = {}
        self._suggest: Dict[str, Any] = {}
        self._script_fields: Dict[str, Any] = {}
        mappings = _mappings(mappings)
        self._mappings: Optional[Mappings] = mappings
        self._aggs: Aggs = Aggs(mappings=mappings,
                                nested_autocorrect=nested_autocorrect)
        self._query: Query = Query(mappings=mappings,
                                   nested_autocorrect=nested_autocorrect)
        self._post_filter: Query = Query(mappings=mappings,
                                         nested_autocorrect=nested_autocorrect)
        self._repr_auto_execute: bool = repr_auto_execute
        self._document_class: Optional[DocumentMeta] = document_class
        super(Search, self).__init__(using=using, index=index)

    def query(self,
              type_or_query: TypeOrQuery,
              insert_below: Optional[QueryName] = None,
              on: Optional[QueryName] = None,
              mode: InsertionModes = ADD,
              compound_param: str = None,
              **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.query(type_or_query,
                                  insert_below=insert_below,
                                  on=on,
                                  mode=mode,
                                  compound_param=compound_param,
                                  **body)
        return s

    query.__doc__ = Query.query.__doc__

    def bool(self,
             must: Optional[SingleOrMultipleQueryClause] = None,
             should: Optional[SingleOrMultipleQueryClause] = None,
             must_not: Optional[SingleOrMultipleQueryClause] = None,
             filter: Optional[SingleOrMultipleQueryClause] = None,
             insert_below: Optional[QueryName] = None,
             on: Optional[QueryName] = None,
             mode: InsertionModes = ADD,
             **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.bool(must=must,
                                 should=should,
                                 filter=filter,
                                 must_not=must_not,
                                 insert_below=insert_below,
                                 on=on,
                                 mode=mode,
                                 **body)
        return s

    bool.__doc__ = Query.bool.__doc__

    def filter(self,
               type_or_query: TypeOrQuery,
               insert_below: Optional[QueryName] = None,
               on: Optional[QueryName] = None,
               mode: InsertionModes = ADD,
               bool_body: ClauseBody = None,
               **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.filter(type_or_query,
                                   insert_below=insert_below,
                                   on=on,
                                   mode=mode,
                                   bool_body=bool_body,
                                   **body)
        return s

    filter.__doc__ = Query.filter.__doc__

    def must_not(self,
                 type_or_query: TypeOrQuery,
                 insert_below: Optional[QueryName] = None,
                 on: Optional[QueryName] = None,
                 mode: InsertionModes = ADD,
                 bool_body: ClauseBody = None,
                 **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.must_not(type_or_query,
                                     insert_below=insert_below,
                                     on=on,
                                     mode=mode,
                                     bool_body=bool_body,
                                     **body)
        return s

    must_not.__doc__ = Query.must_not.__doc__

    def should(self,
               type_or_query: TypeOrQuery,
               insert_below: Optional[QueryName] = None,
               on: Optional[QueryName] = None,
               mode: InsertionModes = ADD,
               bool_body: ClauseBody = None,
               **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.should(type_or_query,
                                   insert_below=insert_below,
                                   on=on,
                                   mode=mode,
                                   bool_body=bool_body,
                                   **body)
        return s

    should.__doc__ = Query.should.__doc__

    def must(self,
             type_or_query: TypeOrQuery,
             insert_below: Optional[QueryName] = None,
             on: Optional[QueryName] = None,
             mode: InsertionModes = ADD,
             bool_body: ClauseBody = None,
             **body: Any) -> "Search":
        s = self._clone()
        s._query = s._query.must(type_or_query,
                                 insert_below=insert_below,
                                 on=on,
                                 mode=mode,
                                 bool_body=bool_body,
                                 **body)
        return s

    must.__doc__ = Query.must.__doc__

    def exclude(self,
                type_or_query: TypeOrQuery,
                insert_below: Optional[QueryName] = None,
                on: Optional[QueryName] = None,
                mode: InsertionModes = ADD,
                bool_body: ClauseBody = None,
                **body: Any) -> "Search":
        """Must not wrapped in filter context."""
        s = self._clone()
        s._query = s._query.filter(
            Bool(must_not=Query._q(type_or_query=type_or_query, **body)),
            insert_below=insert_below,
            on=on,
            mode=mode,
            bool_body=bool_body,
        )
        return s

    def post_filter(self,
                    type_or_query: TypeOrQuery,
                    insert_below: Optional[QueryName] = None,
                    on: Optional[QueryName] = None,
                    mode: InsertionModes = ADD,
                    compound_param: str = None,
                    **body: Any) -> "Search":
        s = self._clone()
        s._post_filter = s._post_filter.query(type_or_query=type_or_query,
                                              insert_below=insert_below,
                                              on=on,
                                              mode=mode,
                                              compound_param=compound_param,
                                              **body)
        return s

    def agg(self,
            name: AggName,
            type_or_agg: Optional[TypeOrAgg] = None,
            insert_below: Optional[AggName] = None,
            at_root: bool_ = False,
            **body: Any) -> "Search":
        s = self._clone()
        s._aggs = s._aggs.agg(name,
                              type_or_agg=type_or_agg,
                              insert_below=insert_below,
                              at_root=at_root,
                              **body)
        return s

    agg.__doc__ = Aggs.agg.__doc__

    def aggs(
        self,
        aggs: Union[AggsDictOrNode, "Aggs"],
        insert_below: Optional[AggName] = None,
        at_root: bool_ = False,
    ) -> "Search":
        s = self._clone()
        s._aggs = s._aggs.aggs(aggs,
                               insert_below=insert_below,
                               at_root=at_root)
        return s

    aggs.__doc__ = Aggs.aggs.__doc__

    def groupby(self,
                name: AggName,
                type_or_agg: Optional[TypeOrAgg] = None,
                insert_below: Optional[AggName] = None,
                at_root: bool_ = False,
                **body: Any) -> "Search":
        s = self._clone()
        s._aggs = s._aggs.groupby(name,
                                  type_or_agg=type_or_agg,
                                  insert_below=insert_below,
                                  at_root=at_root,
                                  **body)
        return s

    groupby.__doc__ = Aggs.groupby.__doc__

    def __iter__(self) -> Iterator[Hit]:
        """
        Iterate over the hits. Return iterable of ``pandagg.response.Hit``.
        """
        return iter(self.execute())

    def __getitem__(self, n: Union[slice, List, int]) -> "Search":
        """
        Support slicing the `Search` instance for pagination.

        Slicing equates to the from/size parameters. E.g.::

            s = Search().query(...)[0:25]

        is equivalent to::

            s = Search().query(...).params(from=0, size=25)

        """
        s = self._clone()

        if isinstance(n, slice):
            # If negative slicing, abort.
            if n.start and n.start < 0 or n.stop and n.stop < 0:
                raise ValueError("Search does not support negative slicing.")
            # Elasticsearch won't get all results so we default to size: 10 if
            # stop not given.
            s._params["from"] = n.start or 0
            s._params["size"] = n.stop - (n.start
                                          or 0) if n.stop is not None else 10
            return s
        if isinstance(n, list):
            return s.source(includes=n)
        # This is an index lookup, equivalent to slicing by [n:n+1].
        # If negative index, abort.
        if n < 0:
            raise ValueError("Search does not support negative indexing.")
        s._params["from"] = n
        s._params["size"] = 1
        return s

    def size(self, size: int) -> "Search":
        """
        Equivalent to::

            s = Search().params(size=size)

        """
        s = self._clone()
        s._params["size"] = size
        return s

    @classmethod
    def from_dict(cls, d: Dict) -> "Search":
        """
        Construct a new `Search` instance from a raw dict containing the search
        body. Useful when migrating from raw dictionaries.

        Example::

            s = Search.from_dict({
                "query": {
                    "bool": {
                        "must": [...]
                    }
                },
                "aggs": {...}
            })
            s = s.filter('term', published=True)
        """
        s = cls()
        s.update_from_dict(d)
        return s

    def _clone(self) -> "Search":
        """
        Return a clone of the current search request. Performs a shallow copy
        of all the underlying objects. Used internally by most state modifying
        APIs.
        """
        s = Search(using=self._using,
                   index=self._index,
                   mappings=self._mappings)
        s._params = self._params.copy()
        s._sort = self._sort[:]
        s._source = copy.copy(
            self._source) if self._source is not None else None
        s._highlight = self._highlight.copy()
        s._highlight_opts = self._highlight_opts.copy()
        s._suggest = self._suggest.copy()
        s._script_fields = self._script_fields.copy()
        s._aggs = self._aggs.clone()
        s._query = self._query.clone()
        s._post_filter = self._post_filter.clone()
        s._mappings = None if self._mappings is None else self._mappings.clone(
        )
        s._repr_auto_execute = self._repr_auto_execute
        s._document_class = self._document_class
        return s

    def update_from_dict(self, d: Dict) -> "Search":
        """
        Apply options from a serialized body to the current instance. Modifies
        the object in-place. Used mostly by ``from_dict``.
        """
        d = d.copy()
        if "query" in d:
            self._query = Query(d.pop("query"))
        if "post_filter" in d:
            self._post_filter = Query(d.pop("post_filter"))

        aggs = d.pop("aggs", d.pop("aggregations", {}))
        if aggs:
            self._aggs = Aggs(aggs)
        if "sort" in d:
            self._sort = d.pop("sort")
        if "_source" in d:
            self._source = d.pop("_source")
        if "highlight" in d:
            high = d.pop("highlight").copy()
            self._highlight = high.pop("fields")
            self._highlight_opts = high
        if "suggest" in d:
            self._suggest = d.pop("suggest")
            if "text" in self._suggest:
                text = self._suggest.pop("text")
                for s in self._suggest.values():
                    s.setdefault("text", text)
        if "script_fields" in d:
            self._script_fields = d.pop("script_fields")
        self._params.update(d)
        return self

    def script_fields(self, **kwargs: Any) -> "Search":
        """
        Define script fields to be calculated on hits. See
        https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-script-fields.html
        for more details.

        Example::

            s = Search()
            s = s.script_fields(times_two="doc['field'].value * 2")
            s = s.script_fields(
                times_three={
                    'script': {
                        'inline': "doc['field'].value * params.n",
                        'params': {'n': 3}
                    }
                }
            )

        """
        s = self._clone()
        for name in kwargs:
            if isinstance(kwargs[name], str):
                kwargs[name] = {"script": kwargs[name]}
        s._script_fields.update(kwargs)
        return s

    def source(self,
               fields: Union[str, List[str], Dict[str, Any]] = None,
               **kwargs: Any) -> "Search":
        """
        Selectively control how the _source field is returned.

        :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes

        If ``fields`` is None, the entire document will be returned for
        each hit.  If fields is a dictionary with keys of 'includes' and/or
        'excludes' the fields will be either included or excluded appropriately.

        Calling this multiple times with the same named parameter will override the
        previous values with the new ones.

        Example::

            s = Search()
            s = s.source(includes=['obj1.*'], excludes=["*.description"])

            s = Search()
            s = s.source(includes=['obj1.*']).source(excludes=["*.description"])

        """
        s = self._clone()

        if fields and kwargs:
            raise ValueError(
                "You cannot specify fields and kwargs at the same time.")

        if fields is not None:
            s._source = fields
            return s

        if kwargs and not isinstance(s._source, dict):
            s._source = {}

        for key, value in kwargs.items():
            if value is None:
                try:
                    del s._source[key]
                except KeyError:
                    pass
            else:
                s._source[key] = value

        return s

    def sort(self, *keys: Union[str, Dict[str, Any]]) -> "Search":
        """
        Add sorting information to the search request. If called without
        arguments it will remove all sort requirements. Otherwise it will
        replace them. Acceptable arguments are::

            'some.field'
            '-some.other.field'
            {'different.field': {'any': 'dict'}}

        so for example::

            s = Search().sort(
                'category',
                '-title',
                {"price" : {"order" : "asc", "mode" : "avg"}}
            )

        will sort by ``category``, ``title`` (in descending order) and
        ``price`` in ascending order using the ``avg`` mode.

        The API returns a copy of the Search object and can thus be chained.
        """
        s = self._clone()
        s._sort = []
        for k in keys:
            if isinstance(k, str) and k.startswith("-"):
                if k[1:] == "_score":
                    raise ValueError("Sorting by `-_score` is not allowed.")
                k = {k[1:]: {"order": "desc"}}
            s._sort.append(k)
        return s

    def highlight_options(self, **kwargs: Any) -> "Search":
        """
        Update the global highlighting options used for this request. For
        example::

            s = Search()
            s = s.highlight_options(order='score')
        """
        s = self._clone()
        s._highlight_opts.update(kwargs)
        return s

    def highlight(self, *fields: str, **kwargs: Any) -> "Search":
        """
        Request highlighting of some fields. All keyword arguments passed in will be
        used as parameters for all the fields in the ``fields`` parameter. Example::

            Search().highlight('title', 'body', fragment_size=50)

        will produce the equivalent of::

            {
                "highlight": {
                    "fields": {
                        "body": {"fragment_size": 50},
                        "title": {"fragment_size": 50}
                    }
                }
            }

        If you want to have different options for different fields you can call ``highlight`` twice::

            Search().highlight('title', fragment_size=50).highlight('body', fragment_size=100)

        which will produce::

            {
                "highlight": {
                    "fields": {
                        "body": {"fragment_size": 100},
                        "title": {"fragment_size": 50}
                    }
                }
            }

        """
        s = self._clone()
        for f in fields:
            s._highlight[f] = kwargs
        return s

    def suggest(self, name: str, text: str, **kwargs: Any) -> "Search":
        """
        Add a suggestions request to the search.

        :arg name: name of the suggestion
        :arg text: text to suggest on

        All keyword arguments will be added to the suggestions body. For example::

            s = Search()
            s = s.suggest('suggestion-1', 'Elasticsearch', term={'field': 'body'})
        """
        s = self._clone()
        s._suggest[name] = {"text": text}
        s._suggest[name].update(kwargs)
        return s

    def to_dict(self, count: bool_ = False, **kwargs: Any) -> SearchDict:
        """
        Serialize the search into the dictionary that will be sent over as the
        request's body.

        :arg count: a flag to specify if we are interested in a body for count -
            no aggregations, no pagination bounds etc.

        All additional keyword arguments will be included into the dictionary.
        """
        d: SearchDict = {}

        if self._query:
            dq = self._query.to_dict()
            if dq:
                d["query"] = dq

        # count request doesn't care for sorting and other things
        if not count:
            if self._post_filter:
                pfd = self._post_filter.to_dict()
                if pfd:
                    d["post_filter"] = pfd

            if self._aggs:
                d["aggs"] = self._aggs.to_dict()

            if self._sort:
                d["sort"] = self._sort

            # query params are not typed in search dict
            d.update(self._params)  # type: ignore

            if self._source not in (None, {}):
                d["_source"] = self._source

            if self._highlight:
                highlights: Dict[str, Any] = {"fields": self._highlight}
                highlights.update(self._highlight_opts)
                d["highlight"] = highlights

            if self._suggest:
                d["suggest"] = self._suggest

            if self._script_fields:
                d["script_fields"] = self._script_fields

        # TODO: check if those kwargs are really useful
        d.update(kwargs)  # type: ignore
        return d

    def count(self) -> int:
        """
        Return the number of hits matching the query and filters. Note that
        only the actual number is returned.
        """
        es = self._get_connection()

        d = self.to_dict(count=True)
        return es.count(index=self._index, body=d)["count"]

    def execute(self) -> SearchResponse:
        """
        Execute the search and return an instance of ``Response`` wrapping all
        the data.
        """
        es = self._get_connection()
        raw_data = es.search(index=self._index, body=self.to_dict())
        return SearchResponse(data=raw_data, _search=self)  # type: ignore

    def scan_composite_agg(self, size: int) -> Iterator[BucketDict]:
        """Iterate over the whole aggregation composed buckets, yields buckets."""
        s: Search = self._clone().size(0)
        s._aggs = s._aggs.as_composite(size=size)
        a_name, _ = s._aggs.get_composition_supporting_agg()
        r: SearchResponse = s.execute()
        buckets: List[BucketDict] = r.aggregations.data[
            a_name][  # type: ignore
                "buckets"]
        after_key: AfterKey = r.aggregations.data[a_name][
            "after_key"]  # type: ignore

        init: bool = True
        while init or len(buckets) == size:
            init = False
            s._aggs = s._aggs.as_composite(size=size, after=after_key)
            r = s.execute()
            agg_clause_response = r.aggregations.data[a_name]
            buckets = agg_clause_response["buckets"]  # type: ignore
            for bucket in buckets:
                yield bucket
            if "after_key" in agg_clause_response:
                after_key = agg_clause_response["after_key"]  # type: ignore
            else:
                break

    def scan_composite_agg_at_once(self, size: int) -> Aggregations:
        """Iterate over the whole aggregation composed buckets (converting Aggs into composite agg if possible), and
        return all buckets at once in a Aggregations instance.
        """
        all_buckets = list(self.scan_composite_agg(size=size))
        s: Search = self._clone().size(0)
        s._aggs = s._aggs.as_composite(size=size)
        agg_name: AggName
        agg_name, _ = s._aggs.get_composition_supporting_agg()  # type: ignore
        # artificially merge all buckets as if they were returned in a single query
        return Aggregations(_search=s,
                            data={agg_name: {
                                "buckets": all_buckets
                            }})

    def scan(self) -> Iterator[Hit]:
        """
        Turn the search into a scan search and return a generator that will
        iterate over all the documents matching the query.

        Use ``params`` method to specify any additional arguments you with to
        pass to the underlying ``scan`` helper from ``elasticsearch-py`` -
        https://elasticsearch-py.readthedocs.io/en/master/helpers.html#elasticsearch.helpers.scan

        """
        es = self._get_connection()
        for hit in scan(es, query=self.to_dict(), index=self._index):
            yield Hit(hit, _document_class=self._document_class)

    def delete(self) -> DeleteByQueryResponse:
        """
        delete() executes the query by delegating to delete_by_query()
        """

        es = self._get_connection()
        return es.delete_by_query(index=self._index,
                                  body=self.to_dict())  # type: ignore

    def __eq__(self, other: Any) -> bool_:
        return (isinstance(other, Search) and other._index == self._index
                and other.to_dict() == self.to_dict())

    def _auto_execution_df_result(self) -> pd.DataFrame:
        try:
            import pandas as pd  # noqa
        except ImportError:
            raise ImportError("repr_auto_execute requires pandas dependency")
        if self._aggs.to_dict():
            # hits are not necessary to display aggregation results
            r = self.size(0).execute()
            return r.aggregations.to_dataframe()
        r = self.execute()
        return r.hits.to_dataframe()

    def __repr__(self) -> str:
        # inspired by https://github.com/elastic/eland/blob/master/eland/dataframe.py#L471 idea to execute search at
        # __repr__ to have more interactive experience
        if not self._repr_auto_execute:
            return json.dumps(self.to_dict(), indent=2)
        return self._auto_execution_df_result().__repr__()

    def _repr_html_(self) -> Optional[str]:
        if not self._repr_auto_execute:
            return None
        return self._auto_execution_df_result()._repr_html_()
コード例 #18
0
ファイル: test_response.py プロジェクト: alkemics/pandagg
 def test_normalize_buckets(self):
     my_agg = Aggs(sample.EXPECTED_AGG_QUERY, mappings=MAPPINGS)
     response = Aggregations(data=sample.ES_AGG_RESPONSE,
                             _search=Search().aggs(my_agg)).to_normalized()
     self.assertEqual(ordered(response),
                      ordered(sample.EXPECTED_NORMALIZED_RESPONSE))