Example #1
0
    def test_nesteds_applied_at_field(self):
        mapping_tree = Mapping(MAPPING)

        self.assertEqual(mapping_tree.nested_at_field("classification_type"),
                         None)
        self.assertEqual(
            mapping_tree.list_nesteds_at_field("classification_type"), [])
        self.assertEqual(mapping_tree.nested_at_field("date"), None)
        self.assertEqual(mapping_tree.list_nesteds_at_field("date"), [])
        self.assertEqual(mapping_tree.nested_at_field("global_metrics"), None)
        self.assertEqual(mapping_tree.list_nesteds_at_field("global_metrics"),
                         [])

        self.assertEqual(mapping_tree.nested_at_field("local_metrics"),
                         "local_metrics")
        self.assertEqual(mapping_tree.list_nesteds_at_field("local_metrics"),
                         ["local_metrics"])
        self.assertEqual(
            mapping_tree.nested_at_field("local_metrics.dataset.support_test"),
            "local_metrics",
        )
        self.assertEqual(
            mapping_tree.list_nesteds_at_field(
                "local_metrics.dataset.support_test"),
            ["local_metrics"],
        )
Example #2
0
 def test_node_path(self):
     mapping_tree = Mapping(MAPPING)
     # get node by path syntax
     node = mapping_tree.get("local_metrics.dataset.support_test")
     self.assertIsInstance(node, Field)
     self.assertEqual(node.name, "support_test")
     self.assertEqual(
         mapping_tree.node_path(node.identifier),
         "local_metrics.dataset.support_test",
     )
Example #3
0
 def __init__(self, *args, **kwargs):
     self._client = kwargs.pop("client", None)
     self._index = kwargs.pop("index", None)
     root_path = kwargs.pop("root_path", None)
     depth = kwargs.pop("depth", 1)
     initial_tree = kwargs.pop("initial_tree", None)
     tree = Mapping(*args, **kwargs)
     super(IMapping, self).__init__(tree=tree,
                                    root_path=root_path,
                                    depth=depth,
                                    initial_tree=initial_tree)
     # if we reached a leave, add aggregation capabilities based on reached mapping type
     self._set_agg_property_if_required()
Example #4
0
    def test_mapping_aggregations(self):
        mapping_tree = Mapping(MAPPING)
        # check that leaves are expanded, based on 'field_name' attribute of nodes
        mapping = IMapping(mapping_tree, depth=1)
        for field_name in (
                "classification_type",
                "date",
                "global_metrics",
                "id",
                "language",
                "local_metrics",
                "workflow",
        ):
            self.assertTrue(hasattr(mapping, field_name))

        dataset = mapping.global_metrics.dataset
        self.assertEqual(
            dataset.__repr__(),
            """<Mapping subpart: global_metrics.dataset>
dataset                                                      {Object}
├── nb_classes                                                Integer
└── support_train                                             Integer
""",
        )
        # capture print statement
        captured_output = StringIO()
        sys.stdout = captured_output
        # what triggers print
        dataset()
        # restore stout
        sys.stdout = sys.__stdout__
        self.assertEqual(
            captured_output.getvalue(),
            """{
  "dynamic": false,
  "properties": {
    "nb_classes": {
      "type": "integer"
    },
    "support_train": {
      "type": "integer"
    }
  }
}
""",
        )
Example #5
0
    def test_mapping_aggregations(self):
        mapping_tree = Mapping(MAPPING)
        # check that leaves are expanded, based on 'field_name' attribute of nodes
        mapping = IMapping(mapping_tree, depth=1)
        for field_name in (
                "classification_type",
                "date",
                "global_metrics",
                "id",
                "language",
                "local_metrics",
                "workflow",
        ):
            self.assertTrue(hasattr(mapping, field_name))

        workflow = mapping.workflow
        # Check that calling a tree will return its root node.
        workflow_node = workflow()
        self.assertTrue(isinstance(workflow_node, Field))
Example #6
0
    def test_mapping_type_of_field(self):
        mapping_tree = Mapping(MAPPING)
        with self.assertRaises(AbsentMappingFieldError):
            self.assertEqual(mapping_tree.mapping_type_of_field("yolo"), False)

        self.assertEqual(mapping_tree.mapping_type_of_field("global_metrics"),
                         "object")
        self.assertEqual(mapping_tree.mapping_type_of_field("local_metrics"),
                         "nested")
        self.assertEqual(
            mapping_tree.mapping_type_of_field(
                "global_metrics.field.name.raw"),
            "keyword",
        )
        self.assertEqual(
            mapping_tree.mapping_type_of_field(
                "local_metrics.dataset.support_test"),
            "integer",
        )
Example #7
0
 def test_resolve_path_to_id(self, uuid_mock):
     uuid_mock.side_effect = range(100)
     mapping_tree = Mapping(MAPPING)
     # do not resolve
     self.assertEqual(
         mapping_tree.resolve_path_to_id(
             "global_metrics.non_existing_field"),
         "global_metrics.non_existing_field",
     )
     # resolve
     self.assertEqual(
         mapping_tree.resolve_path_to_id("classification_type"),
         "classification_type0",
     )
     self.assertEqual(
         mapping_tree.resolve_path_to_id(
             "local_metrics.dataset.support_test"),
         "support_test23",
     )
Example #8
0
    def __init__(self, using=None, index=None, mapping=None):
        """
        Search request to elasticsearch.

        :arg using: `Elasticsearch` instance to use
        :arg index: limit the search to index
        :arg mapping: mapping used for query validation

        All the parameters supplied (or omitted) at creation type can be later
        overridden by methods (`using`, `index` and `mapping` respectively).
        """

        self._sort = []
        self._source = None
        self._highlight = {}
        self._highlight_opts = {}
        self._suggest = {}
        self._script_fields = {}
        mapping = Mapping(mapping)
        self._mapping = mapping
        self._aggs = Aggs(mapping=mapping)
        self._query = Query(mapping=mapping)
        self._post_filter = Query(mapping=mapping)
        super(Search, self).__init__(using=using, index=index)
Example #9
0
    def test_imapping_init(self):

        mapping_dict = {
            "dynamic": False,
            "properties": {
                "classification_type": {
                    "type": "keyword",
                    "fields": {
                        "raw": {
                            "type": "text"
                        }
                    },
                },
                "local_metrics": {
                    "type": "nested",
                    "dynamic": False,
                    "properties": {
                        "dataset": {
                            "dynamic": False,
                            "properties": {
                                "support_test": {
                                    "type": "integer"
                                },
                                "support_train": {
                                    "type": "integer"
                                },
                            },
                        }
                    },
                },
            },
        }

        mapping_tree = Mapping(mapping_dict)
        client_mock = Mock(spec=["search"])
        index_name = "classification_report_index_name"

        # from dict
        im1 = IMapping(mapping_dict, client=client_mock, index=index_name)
        # from tree
        im2 = IMapping(mapping_tree, client=client_mock, index=index_name)

        # from nodes
        im3 = IMapping(
            properties={
                Keyword("classification_type", fields=[Text("raw")]),
                Nested(
                    "local_metrics",
                    dynamic=False,
                    properties=[
                        Object(
                            "dataset",
                            dynamic=False,
                            properties=[
                                Integer("support_test"),
                                Integer("support_train"),
                            ],
                        )
                    ],
                ),
            },
            dynamic=False,
            client=client_mock,
            index=index_name,
        )
        for i, m in enumerate((im1, im2, im3)):
            self.assertEqual(m._tree.serialize(), mapping_dict,
                             "failed at m%d" % (i + 1))
            self.assertEqual(m._index, index_name)
            self.assertIs(m._client, client_mock)
Example #10
0
    def test_client_bound(self):
        """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate
        aggregations on that field type.
        """
        client_mock = Mock(spec=["search"])
        es_response_mock = {
            "_shards": {
                "failed": 0,
                "successful": 135,
                "total": 135
            },
            "aggregations": {
                "terms_agg": {
                    "buckets": [
                        {
                            "doc_count": 25,
                            "key": 1
                        },
                        {
                            "doc_count": 50,
                            "key": 2
                        },
                    ],
                    "doc_count_error_upper_bound":
                    0,
                    "sum_other_doc_count":
                    4,
                }
            },
            "hits": {
                "hits": [],
                "max_score": 0.0,
                "total": 300
            },
            "timed_out": False,
            "took": 30,
        }
        client_mock.search = Mock(return_value=es_response_mock)

        mapping_tree = Mapping(MAPPING)
        client_bound_mapping = IMapping(
            mapping_tree,
            client=client_mock,
            index="classification_report_index_name",
        )

        workflow_field = client_bound_mapping.workflow
        self.assertTrue(hasattr(workflow_field, "a"))
        # workflow type is String
        self.assertIsInstance(workflow_field.a,
                              field_classes_per_name["keyword"])

        response = workflow_field.a.terms(
            size=20,
            raw_output=True,
            query={"term": {
                "classification_type": "multiclass"
            }},
        )
        self.assertEqual(
            response,
            [
                (1, {
                    "doc_count": 25,
                    "key": 1
                }),
                (2, {
                    "doc_count": 50,
                    "key": 2
                }),
            ],
        )
        client_mock.search.assert_called_once()
        client_mock.search.assert_called_with(
            body={
                "aggs": {
                    "terms_agg": {
                        "terms": {
                            "field": "workflow",
                            "size": 20
                        }
                    }
                },
                "size": 0,
                "query": {
                    "term": {
                        "classification_type": "multiclass"
                    }
                },
            },
            index="classification_report_index_name",
        )
Example #11
0
 def __init__(self, *args, **kwargs):
     self.mapping = Mapping(kwargs.pop("mapping", None))
     super(Aggs, self).__init__()
     if args or kwargs:
         self._fill(*args, **kwargs)
Example #12
0
mapping = Mapping(properties=[
    Keyword("movie_id"),
    Text("name", fields=Keyword("raw")),
    Date("year"),
    Float("rank"),
    Keyword("genres"),
    Nested(
        "roles",
        properties=[
            Keyword("role"),
            Keyword("actor_id"),
            Keyword("gender"),
            Text("first_name",
                 copy_to="roles.full_name",
                 fields=Keyword("raw")),
            Text("last_name", copy_to="roles.full_name", fields=Keyword(
                "raw")),
            Text("full_name"),
        ],
    ),
    Nested(
        "directors",
        properties=[
            Keyword("role"),
            Keyword("director_id"),
            Keyword("gender"),
            Text("first_name",
                 copy_to="directors.full_name",
                 fields=Keyword("raw")),
            Text("last_name",
                 copy_to="directors.full_name",
                 fields=Keyword("raw")),
            Text("full_name"),
        ],
    ),
    Integer("nb_directors"),
    Integer("nb_roles"),
]).to_dict()
Example #13
0
    def test_quick_agg_nested(self):
        """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate
        aggregations on that field type, applying nested if necessary.
        """
        client_mock = Mock(spec=["search"])
        es_response_mock = {
            "_shards": {
                "failed": 0,
                "successful": 135,
                "total": 135
            },
            "aggregations": {
                "local_metrics": {
                    "avg_agg": {
                        "value": 23
                    },
                },
            },
            "hits": {
                "hits": [],
                "max_score": 0.0,
                "total": 300
            },
            "timed_out": False,
            "took": 30,
        }
        client_mock.search = Mock(return_value=es_response_mock)

        mapping_tree = Mapping(MAPPING)
        client_bound_mapping = IMapping(
            mapping_tree,
            client=client_mock,
            index="classification_report_index_name",
        )

        local_train_support = client_bound_mapping.local_metrics.dataset.support_train
        self.assertTrue(hasattr(local_train_support, "a"))
        self.assertIsInstance(local_train_support.a,
                              field_classes_per_name["integer"])

        response = local_train_support.a.avg(
            size=20,
            raw_output=True,
            query={"term": {
                "classification_type": "multiclass"
            }},
        )
        self.assertEqual(
            response,
            [
                (None, {
                    "value": 23
                }),
            ],
        )
        client_mock.search.assert_called_once()
        client_mock.search.assert_called_with(
            body={
                "aggs": {
                    "local_metrics": {
                        "nested": {
                            "path": "local_metrics"
                        },
                        "aggs": {
                            "avg_agg": {
                                "avg": {
                                    "field":
                                    "local_metrics.dataset.support_train",
                                    "size": 20,
                                }
                            }
                        },
                    }
                },
                "size": 0,
                "query": {
                    "term": {
                        "classification_type": "multiclass"
                    }
                },
            },
            index="classification_report_index_name",
        )
Example #14
0
    def test_deserialization(self):
        mapping_dict = {
            "dynamic": False,
            "properties": {
                "classification_type": {
                    "type": "keyword",
                    "fields": {
                        "raw": {
                            "type": "text"
                        }
                    },
                },
                "local_metrics": {
                    "type": "nested",
                    "dynamic": False,
                    "properties": {
                        "dataset": {
                            "dynamic": False,
                            "properties": {
                                "support_test": {
                                    "type": "integer"
                                },
                                "support_train": {
                                    "type": "integer"
                                },
                            },
                        }
                    },
                },
            },
        }

        m1 = Mapping(mapping_dict)

        m2 = Mapping(
            dynamic=False,
            properties={
                Keyword("classification_type", fields=[Text("raw")]),
                Nested(
                    "local_metrics",
                    dynamic=False,
                    properties=[
                        Object(
                            "dataset",
                            dynamic=False,
                            properties=[
                                Integer("support_test"),
                                Integer("support_train"),
                            ],
                        )
                    ],
                ),
            },
        )

        expected_repr = """<Mapping>
_                                                              
├── classification_type                                       Keyword
│   └── raw                                                 ~ Text
└── local_metrics                                            [Nested]
    └── dataset                                              {Object}
        ├── support_test                                      Integer
        └── support_train                                     Integer
"""
        for i, m in enumerate((
                m1,
                m2,
        )):
            self.assertEqual(m.__repr__(), expected_repr,
                             "failed at m%d" % (i + 1))
            self.assertEqual(m.serialize(), mapping_dict,
                             "failed at m%d" % (i + 1))
Example #15
0
    def test_parse_tree_from_dict(self):
        mapping_tree = Mapping(MAPPING)

        self.assertEqual(mapping_tree.__str__(), EXPECTED_MAPPING_TREE_REPR)
Example #16
0
 def __init__(self, *args, **kwargs):
     self.mapping = Mapping(kwargs.pop("mapping", None))
     self.nested_autocorrect = kwargs.pop("nested_autocorrect", False)
     super(Aggs, self).__init__()
     if args or kwargs:
         self._fill(*args, **kwargs)