def build_mapping(model_class, mapping=None, doc_type=None, fields=None, exclude=None, field_factory=None, extra=None): """ Defines Elasticsearch fields for Django model fields. By default, this method will create a new ``elasticsearch_dsl.Mapping`` object with fields corresponding to the ``model_class``. :param model_class: The Django model class to build a mapping for :param mapping: An ``elasticsearch_dsl.Mapping`` or ``elasticsearch_dsl.InnerObject`` instance to define fields on :param doc_type: The doc_type to use, if no mapping is specified :param fields: A list of Django model field names to include :param exclude: A list of Django model field names to exclude :param field_factory: A function that takes a Django model field instance, and returns a ``elasticsearch_dsl.Field`` :param extra: A dictionary (field_name -> ``elasticsearch_dsl.Field``) of extra fields to include in the mapping """ if mapping is None: if doc_type is None: doc_type = model_class.__name__.lower() mapping = dsl.Mapping(doc_type) if field_factory is None: field_factory = document_field for f in model_class._meta.get_fields(): if fields and f.name not in fields: continue if exclude and f.name in exclude: continue field = field_factory(f) if field is not None: mapping.field(f.name, field) if extra: for name, field in extra.items(): mapping.field(name, field) return mapping
def add_mappings(self, index_name, fields, queries_doctype, chart_doctype): """ Add document mappings to the index. This creates two mappings - one for the queries we'll be indexing to percolate against, and a second for preprocessing the chart documents we want to percolate. This would be where any special options should be applied to use different highlighters, special analyzers, etc. """ # Add two mappings to the index. One for the queries we'll index, # and one for the documents that will be percolated. # query_mapping = edsl.Mapping(queries_doctype) # query_mapping.field('query', 'percolator') # query_mapping.field('type', type='keyword') # query_mapping.field('code', type='keyword') # query_mapping.save(index_name) # NOTE: We have to use this lower-level method due to the fact that the # elasticsearch_dsl library doesn't yet support percolator Field type percolator_mapping = {'properties': {'query': {'type': 'percolator'}}} self.conn.indices.put_mapping( doc_type=queries_doctype, body=percolator_mapping, index=index_name, ) chart_mapping = edsl.Mapping(chart_doctype) for field in fields: if index_name == "enc_dates_NOT_NOW": chart_mapping.field(field, { "type": 'text', 'analyzer': 'enc_analyzer' }) else: chart_mapping.field(field, {"type": 'text'}) chart_mapping.save(index_name)
def setup_mappings(twitter_index: str, es_host: str = None): """Run through the initial setup of the elasticsearch index used to store tweets.""" if es_host is None: LOG.warning('No Elasticsearch connection setup') return create_es_connection(es_host) mapping_dict = aggregate_data_schema(PluginBase) tweet_mapping = es.Mapping('doc') for key, value in mapping_dict.items(): tweet_mapping.field(key, value) tweet_index = get_singleton_instance(es.Index, twitter_index) LOG.info('Storing tweets in %s', twitter_index) tweet_index.settings( **{ "index.mapping.total_fields.limit": 5000, "number_of_shards": 1, "number_of_replicas": 0, }) # tweet_index.document(Tweet) tweet_index.mapping(tweet_mapping) LOG.info('Checking if Index %s exists and creating if not', twitter_index) if not tweet_index.exists(): LOG.info('Creating new index.') tweet_index.create() else: LOG.info('Index exists, ensuring its up to date.') tweet_index.save()
def gendocu(stream, estype='document'): """ Creates the mapping for type document in Elasticsearch :param estype: Name of ES type (defaults to 'document') """ m = dsl.Mapping(estype) # Set properties m.properties.dynamic = 'strict' # Adding mapping m = gencontext(m) m = m.field('@id', 'string', index='not_analyzed') m = m.field('@type', 'string', index='no') m = m.field('dc:contributor', 'string', index='analyzed', analyzer='autocomplete') access = dsl.Object() access = access.property('@type', 'string') access = access.property('@value', 'date', format='dateOptionalTime') m = m.field('dct:issued', access) m = m.field('dct:modified', access) m = m.field('foaf:primaryTopic', dsl.Object().property('@id', 'string', index='not_analyzed')) # Save the mapping in ES pprint(m.to_dict(), stream=stream)
def copy_mapping(es_mapping, extra=None): doc_type = es_mapping['doc_type'] mapping = dsl.Mapping(doc_type) for f, prop in es_mapping['properties'].items(): field = doc_field(prop['type']) if field is not None: mapping.field(f, field) if extra: for name, field in extra.items(): mapping.field(name, field) return mapping
def create_indices(scanner): for regex in scanner.regexes: id_ = regex.id.lower() index_name = f'{INDEX_PREFIX}-{id_}'.lower() index = es_dsl.Index(index_name) if index.exists(): index.delete() index.create() mapping = es_dsl.Mapping() add_field_mappings(id_, regex, mapping) mapping.save(index_name)
def add_mappings(conn, index_name, queries_doctype, chart_doctype): """ Add document mappings to the index. This creates two mappings - one for the queries we'll be indexing to percolate against, and a second for preprocessing the chart documents we want to percolate. This would be where any special options should be applied to use different highlighters, special analyzers, etc. """ # Add two mappings to the index. One for the queries we'll index, # and one for the documents that will be percolated. # query_mapping = edsl.Mapping(queries_doctype) # query_mapping.field('query', 'percolator') # query_mapping.field('type', type='keyword') # query_mapping.field('code', type='keyword') # query_mapping.save(index_name) # NOTE: We have to use this lower-level method due to the fact that the # elasticsearch_dsl library doesn't yet support percolator Field type percolator_mapping = { 'properties': { 'query': { 'type': 'percolator' } } } conn.indices.put_mapping( doc_type=queries_doctype, body=percolator_mapping, index=index_name, ) # Add the map for charts that will be percolated, so we know # how to preprocess them before searching chart_mapping = edsl.Mapping(chart_doctype) # chart_mapping.field('chart_id', 'keyword') chart_mapping.field('DOC', 'text') chart_mapping.save(index_name)
def genbibres(stream, estype='bibliographicResource'): """ Creates the mapping for type bibliographicResource in Elasticsearch :param estype: Name of ES type (defaults to 'bibliographicResource') """ m = dsl.Mapping(estype) # Set properties m.properties.dynamic = 'strict' # Adding mapping m = gencontext(m) m = m.field('@id', 'string', index='not_analyzed') m = m.field('@type', 'string', index='no') m = m.field('bibo:edition', 'string', index='analyzed') m = m.field('bibo:isbn10', 'string', index='not_analyzed') m = m.field('bibo:isbn13', 'string', index='not_analyzed') m = m.field('bibo:issn', 'string', index='not_analyzed') m = m.field('dbp:originalLanguage', dsl.Object().property('@id', 'string', index='not_analyzed')) contrib = dsl.Nested() contrib = contrib.property('@id', dsl.String(index='no')) contrib = contrib.property('@type', dsl.String(index='no')) contrib = contrib.property('dbp:birthYear', dsl.String(index='not_analyzed')) contrib = contrib.property('dbp:deathYear', dsl.String(index='not_analyzed')) contrib = contrib.property('foaf:firstName', dsl.String(index='analyzed')) contrib = contrib.property('foaf:lastName', dsl.String(index='analyzed')) contrib = contrib.property('foaf:name', dsl.String(index='analyzed')) contrib = contrib.property('rdfs:label', dsl.String(index='analyzed')) contrib = contrib.property('skos:note', dsl.String(index='analyzed')) m = m.field('dc:contributor', contrib) m = m.field('dc:format', 'string', index='analyzed') m = m.field('dct:alternative', 'string', index='analyzed', fields={'folded': dsl.String(analyzer='text_folded')}) m = m.field('dct:bibliographicCitation', 'string', index='analyzed', analyzer='standard') m = m.field('dct:hasPart', 'string', index='analyzed') m = m.field('dct:isPartOf', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('dct:issued', 'string', index='analyzed') m = m.field('dct:language', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('dct:subject', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('dct:title', 'string', index='analyzed', fields={'folded': dsl.String(analyzer='text_folded')}) m = m.field('rdau:contentType', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('rdau:dissertationOrThesisInformation', 'string', index='analyzed') m = m.field('rdau:mediaType', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('rdau:modeOfIssuance', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('rdau:noteOnResource', 'string', index='not_analyzed') m = m.field('rdau:placeOfPublication', dsl.Object().property('@id', 'string', index='not_analyzed')) m = m.field('rdau:publicationStatement', 'string', index='analyzed') m = m.field( 'rdfs:isDefinedBy', dsl.Object().property('@id', 'string', index='analyzed', analyzer='extr_id')) # Save the mapping in ES pprint(m.to_dict(), stream=stream)