Esempio n. 1
0
def configure_schema(schema, version):
    # Generate relevant type mappings for entity properties so that
    # we can do correct searches on each.
    schema_mapping = {}
    numeric_mapping = {registry.date.group: NUMERIC}
    for prop in schema.properties.values():
        config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD))
        config["copy_to"] = ["text"]
        schema_mapping[prop.name] = config
        if prop.type in NUMERIC_TYPES:
            numeric_mapping[prop.name] = NUMERIC

    mapping = {
        "date_detection": False,
        "dynamic": False,
        "_source": {"excludes": ["text", "fingerprints"]},
        "properties": {
            "caption": KEYWORD,
            "schema": KEYWORD,
            "schemata": KEYWORD,
            registry.entity.group: KEYWORD,
            registry.language.group: KEYWORD,
            registry.country.group: KEYWORD,
            registry.checksum.group: KEYWORD,
            registry.ip.group: KEYWORD,
            registry.url.group: KEYWORD,
            registry.iban.group: KEYWORD,
            registry.email.group: KEYWORD,
            registry.phone.group: KEYWORD,
            registry.mimetype.group: KEYWORD,
            registry.identifier.group: KEYWORD,
            registry.date.group: PARTIAL_DATE,
            registry.address.group: KEYWORD,
            registry.name.group: KEYWORD,
            "fingerprints": {
                "type": "keyword",
                "normalizer": "latin_index",
                "copy_to": "text",
                "fields": {"text": LATIN_TEXT},
            },
            "text": {
                "type": "text",
                "analyzer": "latin_index",
                "search_analyzer": "latin_query",
                "search_quote_analyzer": "latin_index",
                "term_vector": "with_positions_offsets",
            },
            "properties": {"type": "object", "properties": schema_mapping},
            "numeric": {"type": "object", "properties": numeric_mapping},
            "role_id": KEYWORD,
            "collection_id": KEYWORD,
            "origin": KEYWORD,
            "created_at": {"type": "date"},
            "updated_at": {"type": "date"},
        },
    }
    index = schema_index(model.get(schema), version)
    settings = index_settings(shards=get_shard_weight(schema))
    return configure_index(index, mapping, settings)
Esempio n. 2
0
def configure_schema(schema, version):
    # Generate relevant type mappings for entity properties so that
    # we can do correct searches on each.
    schema_mapping = {}
    for prop in schema.properties.values():
        config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD))
        config['copy_to'] = ['text']
        schema_mapping[prop.name] = config

    mapping = {
        "date_detection": False,
        "dynamic": False,
        "_source": {
            "excludes": ["text", "fingerprints"]
        },
        "properties": {
            "name": {
                "type": "text",
                "analyzer": "icu_latin",
                "fields": {"kw": KEYWORD},
                "boost": 3.0,
                "copy_to": "text"
            },
            "schema": KEYWORD,
            "schemata": KEYWORD,
            "foreign_id": KEYWORD,
            "document_id": KEYWORD,
            "collection_id": KEYWORD,
            "uploader_id": KEYWORD,
            "entities": KEYWORD,
            "languages": KEYWORD,
            "countries": KEYWORD,
            "checksums": KEYWORD,
            "keywords": KEYWORD,
            "ips": KEYWORD,
            "urls": KEYWORD,
            "ibans": KEYWORD,
            "emails": KEYWORD,
            "phones": KEYWORD,
            "mimetypes": KEYWORD,
            "identifiers": KEYWORD,
            "dates": PARTIAL_DATE,
            "addresses": {
                "type": "keyword",
                "fields": {"text": LATIN_TEXT}
            },
            "names": {
                "type": "keyword",
                "fields": {"text": LATIN_TEXT},
                "copy_to": "text"
            },
            "fingerprints": {
                "type": "keyword",
                "normalizer": "icu_latin",
                "copy_to": "text",
                "fields": {"text": LATIN_TEXT}
            },
            "text": {
                "type": "text",
                "analyzer": "icu_latin",
                "term_vector": "with_positions_offsets",
                "store": True
            },
            "properties": {
                "type": "object",
                "properties": schema_mapping
            },
            "updated_at": {"type": "date"},
        }
    }
    index = schema_index(model.get(schema), version)
    settings = index_settings(shards=get_shard_weight(schema))
    return configure_index(index, mapping, settings)
Esempio n. 3
0
def configure_schema(schema, version):
    # Generate relevant type mappings for entity properties so that
    # we can do correct searches on each.
    schema_mapping = {}
    for prop in schema.properties.values():
        config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD))
        config['copy_to'] = ['text']
        schema_mapping[prop.name] = config

    mapping = {
        "date_detection": False,
        "dynamic": False,
        "_source": {
            "excludes": ["text", "fingerprints"]
        },
        "properties": {
            "name": {
                "type": "text",
                "analyzer": "icu_latin",
                "fields": {"kw": KEYWORD},
                "boost": 3.0,
                "copy_to": "text"
            },
            "schema": KEYWORD,
            "schemata": KEYWORD,
            "bulk": {"type": "boolean"},
            "status": KEYWORD,
            "error_message": {
                "type": "text",
                "copy_to": "text",
                "index": False
            },
            "foreign_id": KEYWORD,
            "document_id": KEYWORD,
            "collection_id": KEYWORD,
            "uploader_id": KEYWORD,
            "fingerprints": {
                "type": "keyword",
                "normalizer": "icu_latin",
                "copy_to": "text",
                "fields": {"text": LATIN_TEXT}
            },
            "entities": KEYWORD,
            "languages": KEYWORD,
            "countries": KEYWORD,
            "checksums": KEYWORD,
            "keywords": KEYWORD,
            "ips": KEYWORD,
            "urls": KEYWORD,
            "ibans": KEYWORD,
            "emails": KEYWORD,
            "phones": KEYWORD,
            "mimetypes": KEYWORD,
            "identifiers": KEYWORD,
            "addresses": {
                "type": "keyword",
                "fields": {"text": LATIN_TEXT}
            },
            "dates": PARTIAL_DATE,
            "names": {
                "type": "keyword",
                "fields": {"text": LATIN_TEXT},
                "copy_to": "text"
            },
            "created_at": {"type": "date"},
            "updated_at": {"type": "date"},
            "text": {
                "type": "text",
                "analyzer": "icu_latin",
                "term_vector": "with_positions_offsets",
                "store": True
            },
            "properties": {
                "type": "object",
                "properties": schema_mapping
            }
        }
    }
    index = schema_index(model.get(schema), version)
    return configure_index(
        index, mapping, index_settings(shards=get_shard_weight(schema))
    )