def test_nesteds_applied_at_field(self): mapping_tree = Mapping(MAPPING) self.assertEqual(mapping_tree.nested_at_field("classification_type"), None) self.assertEqual( mapping_tree.list_nesteds_at_field("classification_type"), []) self.assertEqual(mapping_tree.nested_at_field("date"), None) self.assertEqual(mapping_tree.list_nesteds_at_field("date"), []) self.assertEqual(mapping_tree.nested_at_field("global_metrics"), None) self.assertEqual(mapping_tree.list_nesteds_at_field("global_metrics"), []) self.assertEqual(mapping_tree.nested_at_field("local_metrics"), "local_metrics") self.assertEqual(mapping_tree.list_nesteds_at_field("local_metrics"), ["local_metrics"]) self.assertEqual( mapping_tree.nested_at_field("local_metrics.dataset.support_test"), "local_metrics", ) self.assertEqual( mapping_tree.list_nesteds_at_field( "local_metrics.dataset.support_test"), ["local_metrics"], )
def test_node_path(self): mapping_tree = Mapping(MAPPING) # get node by path syntax node = mapping_tree.get("local_metrics.dataset.support_test") self.assertIsInstance(node, Field) self.assertEqual(node.name, "support_test") self.assertEqual( mapping_tree.node_path(node.identifier), "local_metrics.dataset.support_test", )
def __init__(self, *args, **kwargs): self._client = kwargs.pop("client", None) self._index = kwargs.pop("index", None) root_path = kwargs.pop("root_path", None) depth = kwargs.pop("depth", 1) initial_tree = kwargs.pop("initial_tree", None) tree = Mapping(*args, **kwargs) super(IMapping, self).__init__(tree=tree, root_path=root_path, depth=depth, initial_tree=initial_tree) # if we reached a leave, add aggregation capabilities based on reached mapping type self._set_agg_property_if_required()
def test_mapping_aggregations(self): mapping_tree = Mapping(MAPPING) # check that leaves are expanded, based on 'field_name' attribute of nodes mapping = IMapping(mapping_tree, depth=1) for field_name in ( "classification_type", "date", "global_metrics", "id", "language", "local_metrics", "workflow", ): self.assertTrue(hasattr(mapping, field_name)) dataset = mapping.global_metrics.dataset self.assertEqual( dataset.__repr__(), """<Mapping subpart: global_metrics.dataset> dataset {Object} ├── nb_classes Integer └── support_train Integer """, ) # capture print statement captured_output = StringIO() sys.stdout = captured_output # what triggers print dataset() # restore stout sys.stdout = sys.__stdout__ self.assertEqual( captured_output.getvalue(), """{ "dynamic": false, "properties": { "nb_classes": { "type": "integer" }, "support_train": { "type": "integer" } } } """, )
def test_mapping_aggregations(self): mapping_tree = Mapping(MAPPING) # check that leaves are expanded, based on 'field_name' attribute of nodes mapping = IMapping(mapping_tree, depth=1) for field_name in ( "classification_type", "date", "global_metrics", "id", "language", "local_metrics", "workflow", ): self.assertTrue(hasattr(mapping, field_name)) workflow = mapping.workflow # Check that calling a tree will return its root node. workflow_node = workflow() self.assertTrue(isinstance(workflow_node, Field))
def test_mapping_type_of_field(self): mapping_tree = Mapping(MAPPING) with self.assertRaises(AbsentMappingFieldError): self.assertEqual(mapping_tree.mapping_type_of_field("yolo"), False) self.assertEqual(mapping_tree.mapping_type_of_field("global_metrics"), "object") self.assertEqual(mapping_tree.mapping_type_of_field("local_metrics"), "nested") self.assertEqual( mapping_tree.mapping_type_of_field( "global_metrics.field.name.raw"), "keyword", ) self.assertEqual( mapping_tree.mapping_type_of_field( "local_metrics.dataset.support_test"), "integer", )
def test_resolve_path_to_id(self, uuid_mock): uuid_mock.side_effect = range(100) mapping_tree = Mapping(MAPPING) # do not resolve self.assertEqual( mapping_tree.resolve_path_to_id( "global_metrics.non_existing_field"), "global_metrics.non_existing_field", ) # resolve self.assertEqual( mapping_tree.resolve_path_to_id("classification_type"), "classification_type0", ) self.assertEqual( mapping_tree.resolve_path_to_id( "local_metrics.dataset.support_test"), "support_test23", )
def __init__(self, using=None, index=None, mapping=None): """ Search request to elasticsearch. :arg using: `Elasticsearch` instance to use :arg index: limit the search to index :arg mapping: mapping used for query validation All the parameters supplied (or omitted) at creation type can be later overridden by methods (`using`, `index` and `mapping` respectively). """ self._sort = [] self._source = None self._highlight = {} self._highlight_opts = {} self._suggest = {} self._script_fields = {} mapping = Mapping(mapping) self._mapping = mapping self._aggs = Aggs(mapping=mapping) self._query = Query(mapping=mapping) self._post_filter = Query(mapping=mapping) super(Search, self).__init__(using=using, index=index)
def test_imapping_init(self): mapping_dict = { "dynamic": False, "properties": { "classification_type": { "type": "keyword", "fields": { "raw": { "type": "text" } }, }, "local_metrics": { "type": "nested", "dynamic": False, "properties": { "dataset": { "dynamic": False, "properties": { "support_test": { "type": "integer" }, "support_train": { "type": "integer" }, }, } }, }, }, } mapping_tree = Mapping(mapping_dict) client_mock = Mock(spec=["search"]) index_name = "classification_report_index_name" # from dict im1 = IMapping(mapping_dict, client=client_mock, index=index_name) # from tree im2 = IMapping(mapping_tree, client=client_mock, index=index_name) # from nodes im3 = IMapping( properties={ Keyword("classification_type", fields=[Text("raw")]), Nested( "local_metrics", dynamic=False, properties=[ Object( "dataset", dynamic=False, properties=[ Integer("support_test"), Integer("support_train"), ], ) ], ), }, dynamic=False, client=client_mock, index=index_name, ) for i, m in enumerate((im1, im2, im3)): self.assertEqual(m._tree.serialize(), mapping_dict, "failed at m%d" % (i + 1)) self.assertEqual(m._index, index_name) self.assertIs(m._client, client_mock)
def test_client_bound(self): """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate aggregations on that field type. """ client_mock = Mock(spec=["search"]) es_response_mock = { "_shards": { "failed": 0, "successful": 135, "total": 135 }, "aggregations": { "terms_agg": { "buckets": [ { "doc_count": 25, "key": 1 }, { "doc_count": 50, "key": 2 }, ], "doc_count_error_upper_bound": 0, "sum_other_doc_count": 4, } }, "hits": { "hits": [], "max_score": 0.0, "total": 300 }, "timed_out": False, "took": 30, } client_mock.search = Mock(return_value=es_response_mock) mapping_tree = Mapping(MAPPING) client_bound_mapping = IMapping( mapping_tree, client=client_mock, index="classification_report_index_name", ) workflow_field = client_bound_mapping.workflow self.assertTrue(hasattr(workflow_field, "a")) # workflow type is String self.assertIsInstance(workflow_field.a, field_classes_per_name["keyword"]) response = workflow_field.a.terms( size=20, raw_output=True, query={"term": { "classification_type": "multiclass" }}, ) self.assertEqual( response, [ (1, { "doc_count": 25, "key": 1 }), (2, { "doc_count": 50, "key": 2 }), ], ) client_mock.search.assert_called_once() client_mock.search.assert_called_with( body={ "aggs": { "terms_agg": { "terms": { "field": "workflow", "size": 20 } } }, "size": 0, "query": { "term": { "classification_type": "multiclass" } }, }, index="classification_report_index_name", )
def __init__(self, *args, **kwargs): self.mapping = Mapping(kwargs.pop("mapping", None)) super(Aggs, self).__init__() if args or kwargs: self._fill(*args, **kwargs)
mapping = Mapping(properties=[ Keyword("movie_id"), Text("name", fields=Keyword("raw")), Date("year"), Float("rank"), Keyword("genres"), Nested( "roles", properties=[ Keyword("role"), Keyword("actor_id"), Keyword("gender"), Text("first_name", copy_to="roles.full_name", fields=Keyword("raw")), Text("last_name", copy_to="roles.full_name", fields=Keyword( "raw")), Text("full_name"), ], ), Nested( "directors", properties=[ Keyword("role"), Keyword("director_id"), Keyword("gender"), Text("first_name", copy_to="directors.full_name", fields=Keyword("raw")), Text("last_name", copy_to="directors.full_name", fields=Keyword("raw")), Text("full_name"), ], ), Integer("nb_directors"), Integer("nb_roles"), ]).to_dict()
def test_quick_agg_nested(self): """Check that when reaching leaves (fields without children) leaves have the "a" attribute that can generate aggregations on that field type, applying nested if necessary. """ client_mock = Mock(spec=["search"]) es_response_mock = { "_shards": { "failed": 0, "successful": 135, "total": 135 }, "aggregations": { "local_metrics": { "avg_agg": { "value": 23 }, }, }, "hits": { "hits": [], "max_score": 0.0, "total": 300 }, "timed_out": False, "took": 30, } client_mock.search = Mock(return_value=es_response_mock) mapping_tree = Mapping(MAPPING) client_bound_mapping = IMapping( mapping_tree, client=client_mock, index="classification_report_index_name", ) local_train_support = client_bound_mapping.local_metrics.dataset.support_train self.assertTrue(hasattr(local_train_support, "a")) self.assertIsInstance(local_train_support.a, field_classes_per_name["integer"]) response = local_train_support.a.avg( size=20, raw_output=True, query={"term": { "classification_type": "multiclass" }}, ) self.assertEqual( response, [ (None, { "value": 23 }), ], ) client_mock.search.assert_called_once() client_mock.search.assert_called_with( body={ "aggs": { "local_metrics": { "nested": { "path": "local_metrics" }, "aggs": { "avg_agg": { "avg": { "field": "local_metrics.dataset.support_train", "size": 20, } } }, } }, "size": 0, "query": { "term": { "classification_type": "multiclass" } }, }, index="classification_report_index_name", )
def test_deserialization(self): mapping_dict = { "dynamic": False, "properties": { "classification_type": { "type": "keyword", "fields": { "raw": { "type": "text" } }, }, "local_metrics": { "type": "nested", "dynamic": False, "properties": { "dataset": { "dynamic": False, "properties": { "support_test": { "type": "integer" }, "support_train": { "type": "integer" }, }, } }, }, }, } m1 = Mapping(mapping_dict) m2 = Mapping( dynamic=False, properties={ Keyword("classification_type", fields=[Text("raw")]), Nested( "local_metrics", dynamic=False, properties=[ Object( "dataset", dynamic=False, properties=[ Integer("support_test"), Integer("support_train"), ], ) ], ), }, ) expected_repr = """<Mapping> _ ├── classification_type Keyword │ └── raw ~ Text └── local_metrics [Nested] └── dataset {Object} ├── support_test Integer └── support_train Integer """ for i, m in enumerate(( m1, m2, )): self.assertEqual(m.__repr__(), expected_repr, "failed at m%d" % (i + 1)) self.assertEqual(m.serialize(), mapping_dict, "failed at m%d" % (i + 1))
def test_parse_tree_from_dict(self): mapping_tree = Mapping(MAPPING) self.assertEqual(mapping_tree.__str__(), EXPECTED_MAPPING_TREE_REPR)
def __init__(self, *args, **kwargs): self.mapping = Mapping(kwargs.pop("mapping", None)) self.nested_autocorrect = kwargs.pop("nested_autocorrect", False) super(Aggs, self).__init__() if args or kwargs: self._fill(*args, **kwargs)