def test_mapping_update_is_recursive(): m1 = mapping.Mapping('article') m1.field('title', 'string') m1.field('author', 'object') m1['author'].field('name', 'string') m1.meta('_all', enabled=False) m2 = mapping.Mapping('article') m2.field('published_from', 'date') m2.field('author', 'object') m2.field('title', 'string') m2.field('lang', 'string', index='not_analyzed') m2.meta('_analyzer', path='lang') m2['author'].field('email', 'string') m1.update(m2, update_only=True) assert { 'article': { '_all': {'enabled': False}, '_analyzer': {'path': 'lang'}, 'properties': { 'published_from': {'type': 'date'}, 'title': {'type': 'string'}, 'lang': {'type': 'string', 'index': 'not_analyzed'}, 'author': { 'type': 'object', 'properties': { 'name': {'type': 'string'}, 'email': {'type': 'string'}, } } } } } == m1.to_dict()
def test_mapping_saved_into_es(write_client): m = mapping.Mapping('test-type') m.field('name', 'string', analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) m.field('tags', 'string', index='not_analyzed') m.save('test-mapping', using=write_client) m = mapping.Mapping('other-type') m.field('title', 'string').field('categories', 'string', index='not_analyzed') m.save('test-mapping', using=write_client) assert write_client.indices.exists_type(index='test-mapping', doc_type='test-type') assert { 'test-mapping': { 'mappings': { 'test-type': { 'properties': { 'name': {'type': 'string', 'analyzer': 'my_analyzer'}, 'tags': {'index': 'not_analyzed', 'type': 'string'} } }, 'other-type': { 'properties': { 'title': {'type': 'string'}, 'categories': {'index': 'not_analyzed', 'type': 'string'} } } } } } == write_client.indices.get_mapping(index='test-mapping')
def test_mapping_update_is_recursive(): m1 = mapping.Mapping() m1.field('title', 'text') m1.field('author', 'object') m1.field('author', 'object', properties={'name': {'type': 'text'}}) m1.meta('_all', enabled=False) m1.meta('dynamic', False) m2 = mapping.Mapping() m2.field('published_from', 'date') m2.field('author', 'object', properties={'email': {'type': 'text'}}) m2.field('title', 'text') m2.field('lang', 'keyword') m2.meta('_analyzer', path='lang') m1.update(m2, update_only=True) assert { '_all': {'enabled': False}, '_analyzer': {'path': 'lang'}, 'dynamic': False, 'properties': { 'published_from': {'type': 'date'}, 'title': {'type': 'text'}, 'lang': {'type': 'keyword'}, 'author': { 'type': 'object', 'properties': { 'name': {'type': 'text'}, 'email': {'type': 'text'}, } } } } == m1.to_dict()
def test_mapping_update_is_recursive(): m1 = mapping.Mapping() m1.field("title", "text") m1.field("author", "object") m1.field("author", "object", properties={"name": {"type": "text"}}) m1.meta("_all", enabled=False) m1.meta("dynamic", False) m2 = mapping.Mapping() m2.field("published_from", "date") m2.field("author", "object", properties={"email": {"type": "text"}}) m2.field("title", "text") m2.field("lang", "keyword") m2.meta("_analyzer", path="lang") m1.update(m2, update_only=True) assert { "_all": { "enabled": False }, "_analyzer": { "path": "lang" }, "dynamic": False, "properties": { "published_from": { "type": "date" }, "title": { "type": "text" }, "lang": { "type": "keyword" }, "author": { "type": "object", "properties": { "name": { "type": "text" }, "email": { "type": "text" } }, }, }, } == m1.to_dict()
def test_mapping_can_collect_all_analyzers(): a1 = analysis.analyzer('my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer('english') a3 = analysis.analyzer('unknown_custom') a4 = analysis.analyzer('my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) m = mapping.Mapping('article') m.field('title', 'string', analyzer=a1, fields={ 'english': String(index_analyzer=a2), 'unknown': String(search_analyzer=a3), } ) m.field('comments', Nested(properties={ 'author': String(index_analyzer=a4) })) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}}, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, }, 'tokenizer': { 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, } } == m._collect_analysis()
def test_properties_can_iterate_over_all_the_fields(): m = mapping.Mapping('testing') m.field('f1', 'string', test_attr='f1', fields={'f2': String(test_attr='f2')}) m.field('f3', Nested(test_attr='f3', properties={ 'f4': String(test_attr='f4')})) assert set(('f1', 'f2', 'f3', 'f4')) == set(f.test_attr for f in m.properties._collect_fields())
def test_mapping_can_collect_multiple_analyzers(): a1 = analysis.analyzer( 'my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer( 'my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) m = mapping.Mapping() m.field('title', 'text', analyzer=a1, search_analyzer=a2) m.field( 'text', 'text', analyzer=a1, fields={ 'english': Text(analyzer=a1), 'unknown': Keyword(analyzer=a1, search_analyzer=a2), } ) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}}, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}}, 'tokenizer': {'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}} } == m._collect_analysis()
def test_mapping_saved_into_es_when_index_already_exists_with_analysis( write_client): m = mapping.Mapping() analyzer = analysis.analyzer('my_analyzer', tokenizer='keyword') m.field('name', 'text', analyzer=analyzer) new_analysis = analyzer.get_analysis_definition() new_analysis['analyzer']['other_analyzer'] = { 'type': 'custom', 'tokenizer': 'whitespace' } write_client.indices.create(index='test-mapping', body={'settings': { 'analysis': new_analysis }}) m.field('title', 'text', analyzer=analyzer) m.save('test-mapping', using=write_client) assert { 'test-mapping': { 'mappings': { 'properties': { 'name': { 'type': 'text', 'analyzer': 'my_analyzer' }, 'title': { 'type': 'text', 'analyzer': 'my_analyzer' }, } } } } == write_client.indices.get_mapping(index='test-mapping')
def test_mapping_saved_into_es_when_index_already_exists_closed(write_client): m = mapping.Mapping() m.field('name', 'text', analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) write_client.indices.create(index='test-mapping') with raises(exceptions.IllegalOperation): m.save('test-mapping', using=write_client) write_client.cluster.health(index='test-mapping', wait_for_status='yellow') write_client.indices.close(index='test-mapping') m.save('test-mapping', using=write_client) assert { 'test-mapping': { 'mappings': { 'properties': { 'name': { 'type': 'text', 'analyzer': 'my_analyzer' }, } } } } == write_client.indices.get_mapping(index='test-mapping')
def test_unchanged_mapping_is_not_updated(write_client): m = mapping.Mapping('test-type') m.field('name', 'string', analyzer=analysis.analyzer("my_analyzer", tokenizer="standard", filter=[ token_filter("simple_edge", type="edgeNGram", min_gram=2, max_gram=3) ])) m.save('test-mapping', using=write_client) # this should not trigger an error since the mapping didn't change m.save('test-mapping', using=write_client) # change the mapping just a little bit m.field( 'name', 'string', analyzer=analysis.analyzer( "my_analyzer", tokenizer="standard", filter=[ token_filter( "simple_edge", type="edgeNGram", min_gram=2, max_gram=4 # changed from 3 to 4 ) ])) with raises(exceptions.IllegalOperation): m.save('test-mapping', using=write_client)
def test_mapping_saved_into_es_when_index_already_exists_with_analysis(write_client): m = mapping.Mapping() analyzer = analysis.analyzer("my_analyzer", tokenizer="keyword") m.field("name", "text", analyzer=analyzer) new_analysis = analyzer.get_analysis_definition() new_analysis["analyzer"]["other_analyzer"] = { "type": "custom", "tokenizer": "whitespace", } write_client.indices.create( index="test-mapping", body={"settings": {"analysis": new_analysis}} ) m.field("title", "text", analyzer=analyzer) m.save("test-mapping", using=write_client) assert { "test-mapping": { "mappings": { "properties": { "name": {"type": "text", "analyzer": "my_analyzer"}, "title": {"type": "text", "analyzer": "my_analyzer"}, } } } } == write_client.indices.get_mapping(index="test-mapping")
def test_properties_can_iterate_over_all_the_fields(): m = mapping.Mapping() m.field('f1', 'text', test_attr='f1', fields={'f2': Keyword(test_attr='f2')}) m.field('f3', Nested(test_attr='f3', properties={ 'f4': Text(test_attr='f4')})) assert {'f1', 'f2', 'f3', 'f4'} == {f.test_attr for f in m.properties._collect_fields()}
def test_mapping_saved_into_es_when_index_already_exists_with_analysis( write_client): m = mapping.Mapping('test-type') analyzer = analysis.analyzer('my_analyzer', tokenizer='keyword') m.field('name', 'string', analyzer=analyzer) write_client.indices.create( index='test-mapping', body={'settings': { 'analysis': analyzer.get_analysis_definition() }}) m.save('test-mapping', using=write_client) assert { 'test-mapping': { 'mappings': { 'test-type': { 'properties': { 'name': { 'type': 'string', 'analyzer': 'my_analyzer' }, } } } } } == write_client.indices.get_mapping(index='test-mapping')
def test_mapping_can_collect_all_analyzers_and_normalizers(): a1 = analysis.analyzer('my_analyzer1', tokenizer='keyword', filter=['lowercase', analysis.token_filter('my_filter1', 'stop', stopwords=['a', 'b'])], ) a2 = analysis.analyzer('english') a3 = analysis.analyzer('unknown_custom') a4 = analysis.analyzer('my_analyzer2', tokenizer=analysis.tokenizer('trigram', 'nGram', min_gram=3, max_gram=3), filter=[analysis.token_filter('my_filter2', 'stop', stopwords=['c', 'd'])], ) a5 = analysis.analyzer('my_analyzer3', tokenizer='keyword') n1 = analysis.normalizer('my_normalizer1', filter=['lowercase'] ) n2 = analysis.normalizer('my_normalizer2', filter=['my_filter1', 'my_filter2', analysis.token_filter('my_filter3', 'stop', stopwords=['e', 'f'])] ) n3 = analysis.normalizer('unknown_custom') m = mapping.Mapping() m.field('title', 'text', analyzer=a1, fields={ 'english': Text(analyzer=a2), 'unknown': Keyword(search_analyzer=a3), } ) m.field('comments', Nested(properties={ 'author': Text(analyzer=a4) })) m.field('normalized_title', 'keyword', normalizer=n1) m.field('normalized_comment', 'keyword', normalizer=n2) m.field('unknown', 'keyword', normalizer=n3) m.meta('_all', analyzer=a5) assert { 'analyzer': { 'my_analyzer1': {'filter': ['lowercase', 'my_filter1'], 'tokenizer': 'keyword', 'type': 'custom'}, 'my_analyzer2': {'filter': ['my_filter2'], 'tokenizer': 'trigram', 'type': 'custom'}, 'my_analyzer3': {'tokenizer': 'keyword', 'type': 'custom'}, }, 'normalizer': { 'my_normalizer1': {'filter': ['lowercase'], 'type': 'custom'}, 'my_normalizer2': {'filter': ['my_filter1', 'my_filter2', 'my_filter3'], 'type': 'custom'}, }, 'filter': { 'my_filter1': {'stopwords': ['a', 'b'], 'type': 'stop'}, 'my_filter2': {'stopwords': ['c', 'd'], 'type': 'stop'}, 'my_filter3': {'stopwords': ['e', 'f'], 'type': 'stop'}, }, 'tokenizer': { 'trigram': {'max_gram': 3, 'min_gram': 3, 'type': 'nGram'}, } } == m._collect_analysis() assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
def test_mapping_can_has_fields(): m = mapping.Mapping() m.field('name', 'text').field('tags', 'keyword') assert { 'properties': { 'name': {'type': 'text'}, 'tags': {'type': 'keyword'} } } == m.to_dict()
def test_resolve_nested(): m = mapping.Mapping() m.field('n1', 'nested', properties={'n2': Nested(properties={'k1': Keyword()})}) m.field('k2', 'keyword') nested, field = m.resolve_nested('n1.n2.k1') assert nested == ['n1', 'n1.n2'] assert isinstance(field, Keyword) nested, field = m.resolve_nested('k2') assert nested == [] assert isinstance(field, Keyword)
def test_mapping_can_has_fields(): m = mapping.Mapping('article') m.field('name', 'string').field('tags', 'string', index='not_analyzed') assert { 'article': { 'properties': { 'name': {'type': 'string'}, 'tags': {'index': 'not_analyzed', 'type': 'string'} } } } == m.to_dict()
def test_properties_can_iterate_over_all_the_fields(): m = mapping.Mapping() m.field("f1", "text", test_attr="f1", fields={"f2": Keyword(test_attr="f2")}) m.field("f3", Nested(test_attr="f3", properties={"f4": Text(test_attr="f4")})) assert {"f1", "f2", "f3", "f4"} == {f.test_attr for f in m.properties._collect_fields()}
def test_even_non_custom_analyzers_can_have_params(): a1 = analysis.analyzer('whitespace', type='pattern', pattern=r'\\s+') m = mapping.Mapping() m.field('title', 'text', analyzer=a1) assert { "analyzer": { "whitespace": { "type": "pattern", "pattern": r"\\s+" } } } == m._collect_analysis()
def test_even_non_custom_analyzers_can_have_params(): a1 = analysis.analyzer("whitespace", type="pattern", pattern=r"\\s+") m = mapping.Mapping() m.field("title", "text", analyzer=a1) assert { "analyzer": { "whitespace": { "type": "pattern", "pattern": r"\\s+" } } } == m._collect_analysis()
def test_resolve_nested(): m = mapping.Mapping() m.field("n1", "nested", properties={"n2": Nested(properties={"k1": Keyword()})}) m.field("k2", "keyword") nested, field = m.resolve_nested("n1.n2.k1") assert nested == ["n1", "n1.n2"] assert isinstance(field, Keyword) nested, field = m.resolve_nested("k2") assert nested == [] assert isinstance(field, Keyword)
def test_mapping_can_has_fields(): m = mapping.Mapping() m.field("name", "text").field("tags", "keyword") assert { "properties": { "name": { "type": "text" }, "tags": { "type": "keyword" } } } == m.to_dict()
def test_mapping_saved_into_es(write_client): m = mapping.Mapping() m.field( "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") ) m.field("tags", "keyword") m.save("test-mapping", using=write_client) assert { "test-mapping": { "mappings": { "properties": { "name": {"type": "text", "analyzer": "my_analyzer"}, "tags": {"type": "keyword"}, } } } } == write_client.indices.get_mapping(index="test-mapping")
def test_mapping_saved_into_es(write_client): m = mapping.Mapping('test-type') m.field('name', 'text', analyzer=analysis.analyzer('my_analyzer', tokenizer='keyword')) m.field('tags', 'keyword') m.save('test-mapping', using=write_client) assert write_client.indices.exists_type(index='test-mapping', doc_type='test-type') assert { 'test-mapping': { 'mappings': { 'test-type': { 'properties': { 'name': {'type': 'text', 'analyzer': 'my_analyzer'}, 'tags': {'type': 'keyword'} } } } } } == write_client.indices.get_mapping(index='test-mapping')
def test_mapping_saved_into_es_when_index_already_exists_closed(write_client): m = mapping.Mapping() m.field( "name", "text", analyzer=analysis.analyzer("my_analyzer", tokenizer="keyword") ) write_client.indices.create(index="test-mapping") with raises(exceptions.IllegalOperation): m.save("test-mapping", using=write_client) write_client.cluster.health(index="test-mapping", wait_for_status="yellow") write_client.indices.close(index="test-mapping") m.save("test-mapping", using=write_client) assert { "test-mapping": { "mappings": { "properties": {"name": {"type": "text", "analyzer": "my_analyzer"}} } } } == write_client.indices.get_mapping(index="test-mapping")
def test_mapping_can_collect_multiple_analyzers(): a1 = analysis.analyzer( "my_analyzer1", tokenizer="keyword", filter=[ "lowercase", analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), ], ) a2 = analysis.analyzer( "my_analyzer2", tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), filter=[ analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"]) ], ) m = mapping.Mapping() m.field("title", "text", analyzer=a1, search_analyzer=a2) m.field( "text", "text", analyzer=a1, fields={ "english": Text(analyzer=a1), "unknown": Keyword(analyzer=a1, search_analyzer=a2), }, ) assert { "analyzer": { "my_analyzer1": { "filter": ["lowercase", "my_filter1"], "tokenizer": "keyword", "type": "custom", }, "my_analyzer2": { "filter": ["my_filter2"], "tokenizer": "trigram", "type": "custom", }, }, "filter": { "my_filter1": { "stopwords": ["a", "b"], "type": "stop" }, "my_filter2": { "stopwords": ["c", "d"], "type": "stop" }, }, "tokenizer": { "trigram": { "max_gram": 3, "min_gram": 3, "type": "nGram" } }, } == m._collect_analysis()
def test_resolve_field_can_resolve_multifields(): m = mapping.Mapping() m.field('title', 'text', fields={'keyword': Keyword()}) assert isinstance(m.resolve_field('title.keyword'), Keyword)
def test_mapping_can_collect_all_analyzers_and_normalizers(): a1 = analysis.analyzer( "my_analyzer1", tokenizer="keyword", filter=[ "lowercase", analysis.token_filter("my_filter1", "stop", stopwords=["a", "b"]), ], ) a2 = analysis.analyzer("english") a3 = analysis.analyzer("unknown_custom") a4 = analysis.analyzer( "my_analyzer2", tokenizer=analysis.tokenizer("trigram", "nGram", min_gram=3, max_gram=3), filter=[ analysis.token_filter("my_filter2", "stop", stopwords=["c", "d"]) ], ) a5 = analysis.analyzer("my_analyzer3", tokenizer="keyword") n1 = analysis.normalizer("my_normalizer1", filter=["lowercase"]) n2 = analysis.normalizer( "my_normalizer2", filter=[ "my_filter1", "my_filter2", analysis.token_filter("my_filter3", "stop", stopwords=["e", "f"]), ], ) n3 = analysis.normalizer("unknown_custom") m = mapping.Mapping() m.field( "title", "text", analyzer=a1, fields={ "english": Text(analyzer=a2), "unknown": Keyword(search_analyzer=a3) }, ) m.field("comments", Nested(properties={"author": Text(analyzer=a4)})) m.field("normalized_title", "keyword", normalizer=n1) m.field("normalized_comment", "keyword", normalizer=n2) m.field("unknown", "keyword", normalizer=n3) m.meta("_all", analyzer=a5) assert { "analyzer": { "my_analyzer1": { "filter": ["lowercase", "my_filter1"], "tokenizer": "keyword", "type": "custom", }, "my_analyzer2": { "filter": ["my_filter2"], "tokenizer": "trigram", "type": "custom", }, "my_analyzer3": { "tokenizer": "keyword", "type": "custom" }, }, "normalizer": { "my_normalizer1": { "filter": ["lowercase"], "type": "custom" }, "my_normalizer2": { "filter": ["my_filter1", "my_filter2", "my_filter3"], "type": "custom", }, }, "filter": { "my_filter1": { "stopwords": ["a", "b"], "type": "stop" }, "my_filter2": { "stopwords": ["c", "d"], "type": "stop" }, "my_filter3": { "stopwords": ["e", "f"], "type": "stop" }, }, "tokenizer": { "trigram": { "max_gram": 3, "min_gram": 3, "type": "nGram" } }, } == m._collect_analysis() assert json.loads(json.dumps(m.to_dict())) == m.to_dict()
def test_resolve_field_can_resolve_multifields(): m = mapping.Mapping() m.field("title", "text", fields={"keyword": Keyword()}) assert isinstance(m.resolve_field("title.keyword"), Keyword)