예제 #1
0
파일: mapping.py 프로젝트: VinACE/FMI
def build_mapping(model_class, mapping=None, doc_type=None, fields=None, exclude=None, field_factory=None, extra=None):
    """
    Defines Elasticsearch fields for Django model fields. By default, this method will create a new
    ``elasticsearch_dsl.Mapping`` object with fields corresponding to the ``model_class``.

    :param model_class: The Django model class to build a mapping for
    :param mapping: An ``elasticsearch_dsl.Mapping`` or ``elasticsearch_dsl.InnerObject`` instance to define fields on
    :param doc_type: The doc_type to use, if no mapping is specified
    :param fields: A list of Django model field names to include
    :param exclude: A list of Django model field names to exclude
    :param field_factory: A function that takes a Django model field instance, and returns a ``elasticsearch_dsl.Field``
    :param extra: A dictionary (field_name -> ``elasticsearch_dsl.Field``) of extra fields to include in the mapping
    """
    if mapping is None:
        if doc_type is None:
            doc_type = model_class.__name__.lower()
        mapping = dsl.Mapping(doc_type)
    if field_factory is None:
        field_factory = document_field
    for f in model_class._meta.get_fields():
        if fields and f.name not in fields:
            continue
        if exclude and f.name in exclude:
            continue
        field = field_factory(f)
        if field is not None:
            mapping.field(f.name, field)
    if extra:
        for name, field in extra.items():
            mapping.field(name, field)
    return mapping
예제 #2
0
    def add_mappings(self, index_name, fields, queries_doctype, chart_doctype):
        """ Add document mappings to the index.  This creates two mappings -
        one for the queries we'll be indexing to percolate against, and a second
        for preprocessing the chart documents we want to percolate.
        This would be where any special options should be applied to use different
        highlighters, special analyzers, etc. """
        # Add two mappings to the index.  One for the queries we'll index,
        # and one for the documents that will be percolated.
        # query_mapping = edsl.Mapping(queries_doctype)
        # query_mapping.field('query', 'percolator')
        # query_mapping.field('type', type='keyword')
        # query_mapping.field('code', type='keyword')
        # query_mapping.save(index_name)

        # NOTE: We have to use this lower-level method due to the fact that the
        # elasticsearch_dsl library doesn't yet support percolator Field type

        percolator_mapping = {'properties': {'query': {'type': 'percolator'}}}

        self.conn.indices.put_mapping(
            doc_type=queries_doctype,
            body=percolator_mapping,
            index=index_name,
        )
        chart_mapping = edsl.Mapping(chart_doctype)

        for field in fields:
            if index_name == "enc_dates_NOT_NOW":
                chart_mapping.field(field, {
                    "type": 'text',
                    'analyzer': 'enc_analyzer'
                })
            else:
                chart_mapping.field(field, {"type": 'text'})
        chart_mapping.save(index_name)
예제 #3
0
def setup_mappings(twitter_index: str, es_host: str = None):
    """Run through the initial setup of the elasticsearch index used to store tweets."""
    if es_host is None:
        LOG.warning('No Elasticsearch connection setup')
        return

    create_es_connection(es_host)
    mapping_dict = aggregate_data_schema(PluginBase)
    tweet_mapping = es.Mapping('doc')
    for key, value in mapping_dict.items():
        tweet_mapping.field(key, value)

    tweet_index = get_singleton_instance(es.Index, twitter_index)
    LOG.info('Storing tweets in %s', twitter_index)
    tweet_index.settings(
        **{
            "index.mapping.total_fields.limit": 5000,
            "number_of_shards": 1,
            "number_of_replicas": 0,
        })
    #  tweet_index.document(Tweet)
    tweet_index.mapping(tweet_mapping)
    LOG.info('Checking if Index %s exists and creating if not', twitter_index)
    if not tweet_index.exists():
        LOG.info('Creating new index.')
        tweet_index.create()
    else:
        LOG.info('Index exists, ensuring its up to date.')
        tweet_index.save()
예제 #4
0
def gendocu(stream, estype='document'):
    """
    Creates the mapping for type document in Elasticsearch
    :param estype: Name of ES type (defaults to 'document')
    """
    m = dsl.Mapping(estype)
    # Set properties
    m.properties.dynamic = 'strict'
    # Adding mapping
    m = gencontext(m)
    m = m.field('@id', 'string', index='not_analyzed')
    m = m.field('@type', 'string', index='no')
    m = m.field('dc:contributor',
                'string',
                index='analyzed',
                analyzer='autocomplete')
    access = dsl.Object()
    access = access.property('@type', 'string')
    access = access.property('@value', 'date', format='dateOptionalTime')
    m = m.field('dct:issued', access)
    m = m.field('dct:modified', access)
    m = m.field('foaf:primaryTopic',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    # Save the mapping in ES
    pprint(m.to_dict(), stream=stream)
예제 #5
0
def copy_mapping(es_mapping, extra=None):
    doc_type = es_mapping['doc_type']
    mapping = dsl.Mapping(doc_type)
    for f, prop in es_mapping['properties'].items():
        field = doc_field(prop['type'])
        if field is not None:
            mapping.field(f, field)
    if extra:
        for name, field in extra.items():
            mapping.field(name, field)
    return mapping
예제 #6
0
def create_indices(scanner):
    for regex in scanner.regexes:
        id_ = regex.id.lower()

        index_name = f'{INDEX_PREFIX}-{id_}'.lower()
        index = es_dsl.Index(index_name)
        if index.exists():
            index.delete()
        index.create()

        mapping = es_dsl.Mapping()
        add_field_mappings(id_, regex, mapping)
        mapping.save(index_name)
예제 #7
0
def add_mappings(conn, index_name, queries_doctype,
                 chart_doctype):
    """ Add document mappings to the index.  This creates two mappings -
    one for the queries we'll be indexing to percolate against, and a second
    for preprocessing the chart documents we want to percolate.
    This would be where any special options should be applied to use different
    highlighters, special analyzers, etc. """
    # Add two mappings to the index.  One for the queries we'll index,
    # and one for the documents that will be percolated.
    # query_mapping = edsl.Mapping(queries_doctype)
    # query_mapping.field('query', 'percolator')
    # query_mapping.field('type', type='keyword')
    # query_mapping.field('code', type='keyword')
    # query_mapping.save(index_name)

    # NOTE: We have to use this lower-level method due to the fact that the
    # elasticsearch_dsl library doesn't yet support percolator Field type
    percolator_mapping = {
        'properties': {
            'query': {
                'type': 'percolator'
            }
        }
    }
    conn.indices.put_mapping(
        doc_type=queries_doctype,
        body=percolator_mapping,
        index=index_name,
    )

    # Add the map for charts that will be percolated, so we know
    # how to preprocess them before searching
    chart_mapping = edsl.Mapping(chart_doctype)
    # chart_mapping.field('chart_id', 'keyword')
    chart_mapping.field('DOC', 'text')
    chart_mapping.save(index_name)
예제 #8
0
def genbibres(stream, estype='bibliographicResource'):
    """
    Creates the mapping for type bibliographicResource in Elasticsearch
    :param estype: Name of ES type (defaults to 'bibliographicResource')
    """
    m = dsl.Mapping(estype)
    # Set properties
    m.properties.dynamic = 'strict'
    # Adding mapping
    m = gencontext(m)
    m = m.field('@id', 'string', index='not_analyzed')
    m = m.field('@type', 'string', index='no')
    m = m.field('bibo:edition', 'string', index='analyzed')
    m = m.field('bibo:isbn10', 'string', index='not_analyzed')
    m = m.field('bibo:isbn13', 'string', index='not_analyzed')
    m = m.field('bibo:issn', 'string', index='not_analyzed')
    m = m.field('dbp:originalLanguage',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    contrib = dsl.Nested()
    contrib = contrib.property('@id', dsl.String(index='no'))
    contrib = contrib.property('@type', dsl.String(index='no'))
    contrib = contrib.property('dbp:birthYear',
                               dsl.String(index='not_analyzed'))
    contrib = contrib.property('dbp:deathYear',
                               dsl.String(index='not_analyzed'))
    contrib = contrib.property('foaf:firstName', dsl.String(index='analyzed'))
    contrib = contrib.property('foaf:lastName', dsl.String(index='analyzed'))
    contrib = contrib.property('foaf:name', dsl.String(index='analyzed'))
    contrib = contrib.property('rdfs:label', dsl.String(index='analyzed'))
    contrib = contrib.property('skos:note', dsl.String(index='analyzed'))
    m = m.field('dc:contributor', contrib)
    m = m.field('dc:format', 'string', index='analyzed')
    m = m.field('dct:alternative',
                'string',
                index='analyzed',
                fields={'folded': dsl.String(analyzer='text_folded')})
    m = m.field('dct:bibliographicCitation',
                'string',
                index='analyzed',
                analyzer='standard')
    m = m.field('dct:hasPart', 'string', index='analyzed')
    m = m.field('dct:isPartOf',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('dct:issued', 'string', index='analyzed')
    m = m.field('dct:language',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('dct:subject',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('dct:title',
                'string',
                index='analyzed',
                fields={'folded': dsl.String(analyzer='text_folded')})
    m = m.field('rdau:contentType',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('rdau:dissertationOrThesisInformation',
                'string',
                index='analyzed')
    m = m.field('rdau:mediaType',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('rdau:modeOfIssuance',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('rdau:noteOnResource', 'string', index='not_analyzed')
    m = m.field('rdau:placeOfPublication',
                dsl.Object().property('@id', 'string', index='not_analyzed'))
    m = m.field('rdau:publicationStatement', 'string', index='analyzed')
    m = m.field(
        'rdfs:isDefinedBy',
        dsl.Object().property('@id',
                              'string',
                              index='analyzed',
                              analyzer='extr_id'))
    # Save the mapping in ES
    pprint(m.to_dict(), stream=stream)