Ejemplo n.º 1
0
    def test_bulk_add_documents(self):
        """
        Test adding documents to Elasticsearch in bulk

        """

        se = SearchEngineFactory().create()
        se.create_index(index="test")

        documents = []
        count_before = se.count(index="test")
        for i in range(10):
            doc = {
                "id": i,
                "type": "prefLabel",
                "value": "test pref label",
            }
            documents.append(
                se.create_bulk_item(op_type="index",
                                    index="test",
                                    id=doc["id"],
                                    data=doc))

        ret = se.bulk_index(documents, refresh=True)
        count_after = se.count(index="test")
        self.assertEqual(count_after - count_before, 10)
Ejemplo n.º 2
0
    def test_bulk_add_documents(self):
        """
        Test adding documents to Elasticsearch in bulk

        """

        se = SearchEngineFactory().create()
        se.create_index(index='test')

        documents = []
        count_before = se.count(index='test')
        for i in range(10):
            doc = {
                'id': i,
                'type': 'prefLabel',
                'value': 'test pref label',
            }
            documents.append(
                se.create_bulk_item(op_type='index',
                                    index='test',
                                    id=doc['id'],
                                    data=doc))

        ret = se.bulk_index(documents, refresh=True)
        count_after = se.count(index='test')
        self.assertEqual(count_after - count_before, 10)
Ejemplo n.º 3
0
	def setUpClass(cls):
		se = SearchEngineFactory().create()
		se.delete_index(index='concept_labels')
		se.delete_index(index='term')
		se.create_index(index='concept_labels')
		se.create_index(index='term')
		management.call_command('packages', operation='import_json', source='tests/fixtures/resource_graphs/archesv4_resource.json')
Ejemplo n.º 4
0
def prepare_concepts_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support term search

    """

    index_settings = {
        "settings": {"analysis": {"analyzer": {"folding": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]}}}},
        "mappings": {
            "_doc": {
                "properties": {
                    "top_concept": {"type": "keyword"},
                    "conceptid": {"type": "keyword"},
                    "language": {"type": "keyword"},
                    "id": {"type": "keyword"},
                    "category": {"type": "keyword"},
                    "provisional": {"type": "boolean"},
                    "type": {"type": "keyword"},
                    "value": {
                        "analyzer": "standard",
                        "type": "text",
                        "fields": {"raw": {"type": "keyword"}, "folded": {"analyzer": "folding", "type": "text"}},
                    },
                }
            }
        },
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index="concepts", body=index_settings)

    return index_settings
Ejemplo n.º 5
0
    def prepare_resource_relations_index(self, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support related resources

        """

        index_settings = { 
            'mappings':{
                'all': {
                    'properties': {
                        'resourcexid': {'type': 'long'},
                        'notes': { 'type': 'string'},
                        'relationshiptype': {'type': 'string', 'index' : 'not_analyzed'},
                        'entityid2': {'type': 'string', 'index' : 'not_analyzed'},
                        'entityid1': {'type': 'string', 'index' : 'not_analyzed'}
                    }  
                }
            }
        }    

        if create:
            se = SearchEngineFactory().create()
            se.create_index(index='resource_relations', body=index_settings, ignore=400)

        return index_settings
Ejemplo n.º 6
0
    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """

        index_settings = super(Resource, self).prepare_search_index(resource_type_id, create=False)

        index_settings['mappings'][resource_type_id]['properties']['date_groups'] = { 
            'properties' : {
                'conceptid': {'type' : 'string', 'index' : 'not_analyzed'}
            }
        }
        
        #index_settings['mappings'][resource_type_id]['properties']['measurement_groups'] = { 
        #    'properties' : {
        #        'conceptid': {'type' : 'string', 'index' : 'not_analyzed'}
        #    }
        #}

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)
Ejemplo n.º 7
0
def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        "mappings": {
            "_doc": {
                "properties": {
                    "resourcexid": {"type": "keyword"},
                    "notes": {"type": "text"},
                    "relationshiptype": {"type": "keyword"},
                    "resourceinstanceidfrom": {"type": "keyword"},
                    "resourceinstanceidto": {"type": "keyword"},
                    "created": {"type": "keyword"},
                    "modified": {"type": "keyword"},
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index="resource_relations", body=index_settings)

    return index_settings
Ejemplo n.º 8
0
def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        "mappings": {
            "_doc": {
                "properties": {
                    "resourcexid": {"type": "keyword"},
                    "notes": {"type": "text"},
                    "relationshiptype": {"type": "keyword"},
                    "inverserelationshiptype": {"type": "keyword"},
                    "resourceinstanceidfrom": {"type": "keyword"},
                    "resourceinstancefrom_graphid": {"type": "keyword"},
                    "resourceinstanceidto": {"type": "keyword"},
                    "resourceinstanceto_graphid": {"type": "keyword"},
                    "created": {"type": "keyword"},
                    "modified": {"type": "keyword"},
                    "datestarted": {"type": "date"},
                    "dateended": {"type": "date"},
                    "tileid": {"type": "keyword"},
                    "nodeid": {"type": "keyword"},
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index=RESOURCE_RELATIONS_INDEX, body=index_settings)

    return index_settings
Ejemplo n.º 9
0
def prepare_terms_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support term search

    """

    index_settings = {
        "settings": {"analysis": {"analyzer": {"folding": {"tokenizer": "standard", "filter": ["lowercase", "asciifolding"]}}}},
        "mappings": {
            "_doc": {
                "properties": {
                    "nodegroupid": {"type": "keyword"},
                    "tileid": {"type": "keyword"},
                    "nodeid": {"type": "keyword"},
                    "resourceinstanceid": {"type": "keyword"},
                    "provisional": {"type": "boolean"},
                    "value": {
                        "analyzer": "standard",
                        "type": "text",
                        "fields": {"raw": {"type": "keyword"}, "folded": {"analyzer": "folding", "type": "text"}},
                    },
                }
            }
        },
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index=TERMS_INDEX, body=index_settings)

    return index_settings
Ejemplo n.º 10
0
def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        'mappings': {
            'all': {
                'properties': {
                    'resourcexid': {'type': 'keyword'},
                    'notes': {'type': 'text'},
                    'relationshiptype': {'type': 'keyword'},
                    'resourceinstanceidfrom': {'type': 'keyword'},
                    'resourceinstanceidto': {'type': 'keyword'},
                    'created': {'type': 'keyword'},
                    'modified': {'type': 'keyword'}
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='resource_relations', body=index_settings, ignore=400)

    return index_settings
Ejemplo n.º 11
0
def prepare_resource_relations_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support related resources

    """

    index_settings = {
        'mappings': {
            'all': {
                'properties': {
                    'resourcexid': {'type': 'keyword'},
                    'notes': {'type': 'text'},
                    'relationshiptype': {'type': 'keyword'},
                    'resourceinstanceidfrom': {'type': 'keyword'},
                    'resourceinstanceidto': {'type': 'keyword'},
                    'created': {'type': 'keyword'},
                    'modified': {'type': 'keyword'}
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='resource_relations', body=index_settings, ignore=400)

    return index_settings
    def reverse_func(apps, schema_editor):
        se = SearchEngineFactory().create()
        prefix = settings.ELASTICSEARCH_PREFIX
        if (se.es.indices.exists(index="%s_resource_relations" % prefix)):
            index_settings = prepare_resource_relations_index(create=False)
            index_settings['mappings']['all'] = index_settings['mappings'][
                '_doc']
            index_settings['mappings'].pop('_doc', None)
            se.create_index(index='resource_relations_temp',
                            body=index_settings)
            doc = {
                "source": {
                    "index": "%s_resource_relations" % prefix,
                    "type": "_doc"
                },
                "dest": {
                    "index": "%s_resource_relations_temp" % prefix,
                    "type": "all"
                }
            }
            se.es.reindex(body=doc, refresh=True, wait_for_completion=True)
            se.delete_index(index='resource_relations')

            se.create_index(index='resource_relations', body=index_settings)
            doc = {
                "source": {
                    "index": "%s_resource_relations_temp" % prefix
                },
                "dest": {
                    "index": "%s_resource_relations" % prefix,
                    "type": "all"
                }
            }
            se.es.reindex(body=doc, refresh=True, wait_for_completion=True)
Ejemplo n.º 13
0
    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """

        index_settings = super(Resource,
                               self).prepare_search_index(resource_type_id,
                                                          create=False)

        index_settings['mappings'][resource_type_id]['properties'][
            'date_groups'] = {
                'properties': {
                    'conceptid': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    }
                }
            }

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity',
                                  doc_type=resource_type_id,
                                  body=index_settings)
Ejemplo n.º 14
0
 def setUpClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index="concept_labels")
     se.delete_index(index="term")
     se.create_index(index="concept_labels")
     se.create_index(index="term")
     management.call_command(
         "packages", operation="import_json", source="tests/fixtures/resource_graphs/archesv4_resource.json"
     )
Ejemplo n.º 15
0
    def test_bulk_indexer(self):
        se = SearchEngineFactory().create()
        se.create_index(index="bulk")

        with se.BulkIndexer(batch_size=500, refresh=True) as bulk_indexer:
            for i in range(1001):
                doc = {"id": i, "type": "prefLabel", "value": "test pref label"}
                bulk_indexer.add(index="bulk", id=doc["id"], data=doc)

        count_after = se.count(index="bulk")
        self.assertEqual(count_after, 1001)
Ejemplo n.º 16
0
    def test_bulk_indexer(self):
        se = SearchEngineFactory().create()
        se.create_index(index='bulk')

        with se.BulkIndexer(batch_size=500, refresh=True) as bulk_indexer:
            for i in range(1001):
                doc = {
                    'id': i,
                    'type': 'prefLabel',
                    'value': 'test pref label',
                }
                bulk_indexer.add(index='bulk', id=doc['id'], data=doc)

        count_after = se.count(index='bulk')
        self.assertEqual(count_after, 1001)
Ejemplo n.º 17
0
def prepare_concepts_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support term search

    """

    index_settings = {
        'settings': {
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter': ['lowercase', 'asciifolding']
                    }
                }
            }
        },
        'mappings': {
            '_doc': {
                'properties': {
                    'top_concept': {'type': 'keyword'},
                    'conceptid': {'type': 'keyword'},
                    'language': {'type': 'keyword'},
                    'id': {'type': 'keyword'},
                    'category': {'type': 'keyword'},
                    'provisional': {'type': 'boolean'},
                    'type': {'type': 'keyword'},
                    'value': {
                        'analyzer': 'standard',
                        'type': 'text',
                        'fields': {
                            'raw': {'type': 'keyword'},
                            'folded': {
                                'analyzer': 'folding',
                                'type': 'text'
                            }
                        }
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='concepts', body=index_settings)

    return index_settings
Ejemplo n.º 18
0
def prepare_term_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support term search

    """

    index_settings = {
        'settings': {
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter': ['lowercase', 'asciifolding']
                    }
                }
            }
        },
        'mappings': {
            'value': {
                'properties': {
                    'ids': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'context': {
                        'type': 'string',
                        'index': 'not_analyzed'
                    },
                    'term': {
                        'type': 'string',
                        'analyzer': 'standard',
                        'fields': {
                            'folded': {
                                'type': 'string',
                                'analyzer': 'folding'
                            }
                        }
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='term', body=index_settings, ignore=400)

    return index_settings
Ejemplo n.º 19
0
    def prepare_term_index(self, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support term search

        """

        index_settings = {
            'settings':{
                'analysis': {
                    'analyzer': {
                        'folding': {
                            'tokenizer': 'standard',
                            'filter':  [ 'lowercase', 'asciifolding' ]
                        }
                    }
                }
            },
            'mappings':{
                'value':{
                    'properties': {
                        'ids':{'type': 'string', 'index' : 'not_analyzed'},
                        'context':{'type': 'string', 'index' : 'not_analyzed'},
                        'term': { 
                            'type': 'string',
                            'analyzer': 'standard',
                            'fields': {
                                'folded': { 
                                    'type': 'string',
                                    'analyzer': 'folding'
                                }
                            }
                        }
                    }            
                }            
            }
        }

        if create:
            se = SearchEngineFactory().create()
            se.create_index(index='term', body=index_settings, ignore=400)

        return index_settings
Ejemplo n.º 20
0
    def prepare_resource_relations_index(self, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support related resources

        """

        index_settings = {
            'mappings': {
                'all': {
                    'properties': {
                        'resourcexid': {
                            'type': 'long'
                        },
                        'notes': {
                            'type': 'string'
                        },
                        'relationshiptype': {
                            'type': 'string',
                            'index': 'not_analyzed'
                        },
                        'entityid2': {
                            'type': 'string',
                            'index': 'not_analyzed'
                        },
                        'entityid1': {
                            'type': 'string',
                            'index': 'not_analyzed'
                        }
                    }
                }
            }
        }

        if create:
            se = SearchEngineFactory().create()
            se.create_index(index='resource_relations',
                            body=index_settings,
                            ignore=400)

        return index_settings
Ejemplo n.º 21
0
def prepare_search_index(resource_model_id, create=False):
    """
    Creates the settings and mappings in Elasticsearch to support resource search

    """

    index_settings = {
        'settings': {
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter': ['lowercase', 'asciifolding']
                    }
                }
            }
        },
        'mappings': {
            resource_model_id: {
                'properties': {
                    'graphid': {
                        'type': 'keyword'
                    },
                    'resourceinstanceid': {
                        'type': 'keyword'
                    },
                    'root_ontology_class': {
                        'type': 'keyword'
                    },
                    'displayname': {
                        'type': 'keyword'
                    },
                    'displaydescription': {
                        'type': 'keyword'
                    },
                    'map_popup': {
                        'type': 'keyword'
                    },
                    'provisional': {
                        'type': 'keyword'
                    },
                    'tiles': {
                        'type': 'nested',
                        'properties': {
                            "tiles": {
                                'enabled': False
                            },
                            'tileid': {
                                'type': 'keyword'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'parenttile_id': {
                                'type': 'keyword'
                            },
                            'resourceinstanceid_id': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'strings': {
                        'type': 'nested',
                        'properties': {
                            'string': {
                                'type': 'text',
                                'index': 'analyzed',
                                'fields': {
                                    'raw': {
                                        'type': 'keyword'
                                    },
                                    'folded': {
                                        'type': 'text',
                                        'analyzer': 'folding'
                                    }
                                }
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'domains': {
                        'type': 'nested',
                        'properties': {
                            'value': {
                                'type': 'text',
                                'index': 'analyzed',
                                'fields': {
                                    'raw': {
                                        'type': 'keyword'
                                    }
                                }
                            },
                            'conceptid': {
                                'type': 'keyword'
                            },
                            'valueid': {
                                'type': 'keyword'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'geometries': {
                        'type': 'nested',
                        'properties': {
                            'geom': {
                                'properties': {
                                    'features': {
                                        'properties': {
                                            'geometry': {
                                                'type': 'geo_shape'
                                            },
                                            'id': {
                                                'type': 'keyword'
                                            },
                                            'type': {
                                                'type': 'keyword'
                                            },
                                            'properties': {
                                                'enabled': False
                                            }
                                        }
                                    },
                                    'type': {
                                        'type': 'keyword'
                                    }
                                }
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'points': {
                        'type': 'nested',
                        'properties': {
                            'point': {
                                'type': 'geo_point'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'dates': {
                        'type': 'nested',
                        'properties': {
                            'date': {
                                'type': 'float'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'nodeid': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'numbers': {
                        'type': 'nested',
                        'properties': {
                            'number': {
                                'type': 'double'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    },
                    'date_ranges': {
                        'type': 'nested',
                        'properties': {
                            'date_range': {
                                'type': 'float_range'
                            },
                            'nodegroup_id': {
                                'type': 'keyword'
                            },
                            'provisional': {
                                'type': 'keyword'
                            }
                        }
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        try:
            se.create_index(index='resource', body=index_settings)
        except:
            index_settings = index_settings['mappings']
            se.create_mapping(index='resource',
                              doc_type=resource_model_id,
                              body=index_settings)

    return index_settings
Ejemplo n.º 22
0
def prepare_search_index(resource_model_id, create=False):
    """
    Creates the settings and mappings in Elasticsearch to support resource search

    """

    index_settings = {
        'settings':{
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter':  [ 'lowercase', 'asciifolding' ]
                    }
                }
            }
        },
        'mappings': {
            resource_model_id : {
                'properties' : {
                    'graphid': {'type' : 'string', 'index' : 'not_analyzed'},
                    'resourceinstanceid': {'type' : 'string', 'index' : 'not_analyzed'},
                    'primaryname': {'type' : 'string', 'index' : 'not_analyzed'},
                    'tiles' : {
                        'type' : 'nested',
                        'properties' : {
                            "tiles": {'enabled': False},
                            'tileid' : {'type' : 'string', 'index' : 'not_analyzed'},
                            'nodegroup_id' : {'type' : 'string', 'index' : 'not_analyzed'},
                            'parenttile_id' : {'type' : 'string', 'index' : 'not_analyzed'},
                            'resourceinstanceid_id' : {'type' : 'string', 'index' : 'not_analyzed'}
                        }
                    },
                    'strings' : {
                        'type' : 'string',
                        'index' : 'analyzed',
                        'fields' : {
                            'raw' : { 'type' : 'string', 'index' : 'not_analyzed'},
                            'folded': { 'type': 'string', 'analyzer': 'folding'}
                        }
                    },
                    'domains' : {
                        'properties' : {
                            'value' : {
                                'type' : 'string',
                                'index' : 'analyzed',
                                'fields' : {
                                    'raw' : { 'type' : 'string', 'index' : 'not_analyzed'}
                                }
                            },
                            'conceptid' : {'type' : 'string', 'index' : 'not_analyzed'},
                            'valueid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        }
                    },
                    'geometries' : {
                        "properties": {
                            "features": {
                                "properties": {
                                    "geometry": {"type": "geo_shape"},
                                    "id": { 'type' : 'string', 'index' : 'not_analyzed'},
                                    "type": { 'type' : 'string', 'index' : 'not_analyzed'},
                                    "properties": {
                                         "enabled": False
                                    }
                                }
                            },
                            "type": { 'type' : 'string', 'index' : 'not_analyzed'}
                        }
                    },
                    'dates' : {
                        "type" : "date"
                    },
                    'numbers' : {
                        "type" : "double"
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        try:
            se.create_index(index='resource', body=index_settings)
        except:
            index_settings = index_settings['mappings']
            se.create_mapping(index='resource', doc_type=resource_model_id, body=index_settings)

    return index_settings
Ejemplo n.º 23
0
	def tearDownClass(cls):
		se = SearchEngineFactory().create()
		se.delete_index(index='strings')
		se.create_index(index='strings')
Ejemplo n.º 24
0
def prepare_search_index(resource_model_id, create=False):
    """
    Creates the settings and mappings in Elasticsearch to support resource search

    """

    index_settings = {
        'settings': {
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter':  [ 'lowercase', 'asciifolding' ]
                    }
                }
            }
        },
        'mappings': {
            resource_model_id : {
                'properties' : {
                    'graphid': {'type': 'keyword'},
                    'resourceinstanceid': {'type': 'keyword'},
                    'root_ontology_class': {'type':'keyword'},
                    'displayname': {'type': 'keyword'},
                    'displaydescription': {'type': 'keyword'},
                    'map_popup': {'type': 'keyword'},
                    'provisional': {'type': 'keyword'},
                    'tiles' : {
                        'type' : 'nested',
                        'properties' : {
                            "tiles": {'enabled': False},
                            'tileid' : {'type': 'keyword'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'parenttile_id' : {'type': 'keyword'},
                            'resourceinstanceid_id' : {'type': 'keyword'}
                        }
                    },
                    'strings' : {
                        'type' : 'nested',
                        'properties': {
                            'string': {
                                'type' : 'text',
                                'index' : 'analyzed',
                                'fields' : {
                                    'raw' : {'type': 'keyword'},
                                    'folded': { 'type': 'text', 'analyzer': 'folding'}
                                }
                            },
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'domains' : {
                        'type' : 'nested',
                        'properties' : {
                            'value' : {
                                'type' : 'text',
                                'index' : 'analyzed',
                                'fields' : {
                                    'raw' : {'type': 'keyword'}
                                }
                            },
                            'conceptid' : {'type': 'keyword'},
                            'valueid' : {'type': 'keyword'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'geometries' : {
                        'type' : 'nested',
                        'properties': {
                            'geom': {
                                'properties': {
                                    'features': {
                                        'properties': {
                                            'geometry': {'type': 'geo_shape'},
                                            'id': {'type': 'keyword'},
                                            'type': {'type': 'keyword'},
                                            'properties': {
                                                 'enabled': False
                                            }
                                        }
                                    },
                                    'type': {'type': 'keyword'}
                                }
                            },
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'points': {
                        'type' : 'nested',
                        'properties' : {
                            'point' : {'type': 'geo_point'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'dates' : {
                        'type' : 'nested',
                        'properties' : {
                            'date' : {'type': 'float'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'nodeid' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'numbers' : {
                        'type' : 'nested',
                        'properties' : {
                            'number' : {'type': 'double'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    },
                    'date_ranges': {
                        'type' : 'nested',
                        'properties' : {
                            'date_range' : {'type': 'float_range'},
                            'nodegroup_id' : {'type': 'keyword'},
                            'provisional': {'type': 'keyword'}
                        }
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        try:
            se.create_index(index='resource', body=index_settings)
        except:
            index_settings = index_settings['mappings']
            se.create_mapping(index='resource', doc_type=resource_model_id, body=index_settings)

    return index_settings
Ejemplo n.º 25
0
    def base_prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """
        index_settings = { 
            'settings':{
                'analysis': {
                    "filter": {
                        "desc_ngram": {
                            "type": "ngram",
                            "min_gram": 2,
                            "max_gram": 30
                        }
                    },
                    'analyzer': {
                        'folding': {
                            'tokenizer': 'standard',
                            'filter':  [ 'lowercase', 'asciifolding']
                        },
                        'ducet_sort': {
                          'tokenizer': 'keyword',
                          'filter': [ 'icu_collation'] 
                        },
                        "index_ngram": {
                            "type": "custom",
                            "tokenizer": "keyword",
                            "filter": [ "desc_ngram", "lowercase" ]
                        },
                        "search_ngram": {
                            "type": "custom",
                            "tokenizer": "keyword",
                            "filter": ["standard", "lowercase"] 
                        }
                    }
                }
            },
            'mappings': {
                resource_type_id : {
                    'properties' : {
                        'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'value' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'primaryname': {'type' : 'string', 'index' : 'not_analyzed'},
                        'child_entities' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'analyzer': 'ducet_sort'},
                                'value' : {
                                    'type' : 'string',
                                    "index_analyzer": "index_ngram",
                                    "search_analyzer": "search_ngram",
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'},
                                        'folded': { 'type': 'string', 'analyzer': 'folding'}
                                    }
                                }
                            }
                        },
                        'domains' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'analyzer': 'ducet_sort'},
                                'value' : {
                                    'type' : 'string',
                                    'index' : 'analyzed',
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'}
                                    }
                                },
                                'conceptid' : {'type' : 'string', 'index' : 'not_analyzed'},
                            }
                        },
                        'geometries' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type": "geo_shape"
                                }
                            }
                        },
                        'dates' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type" : "date"
                                }
                            }
                        }
                    }
                }
            }
        }
        
        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)

        return index_settings
Ejemplo n.º 26
0
def prepare_term_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support term search

    """

    index_settings = {
        'settings': {
            'analysis': {
                'analyzer': {
                    'folding': {
                        'tokenizer': 'standard',
                        'filter': [ 'lowercase', 'asciifolding' ]
                    }
                }
            }
        },
        'mappings': {
            'term': {
                'properties': {
                    'nodegroupid': {'type': 'keyword'},
                    'tileid': {'type': 'keyword'},
                    'nodeid': {'type': 'keyword'},
                    'resourceinstanceid': {'type': 'keyword'},
                    'provisional': {'type': 'keyword'},
                    'value': {
                        'analyzer': 'standard',
                        'type': 'text',
                        'fields': {
                            'raw': {'type': 'keyword'},
                            'folded': {
                                'analyzer': 'folding',
                                'type': 'text'
                            }
                        }
                    }
                }
            },
            'concept': {
                'properties': {
                    'top_concept': {'type': 'keyword'},
                    'conceptid': {'type': 'keyword'},
                    'language': {'type': 'keyword'},
                    'id': {'type': 'keyword'},
                    'category': {'type': 'keyword'},
                    'provisional': {'type': 'keyword'},
                    'type': {'type': 'keyword'},
                    'value': {
                        'analyzer': 'standard',
                        'type': 'text',
                        'fields': {
                            'raw': {'type': 'keyword'},
                            'folded': {
                                'analyzer': 'folding',
                                'type': 'text'
                            }
                        }
                    }
                }
            }
        }
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index='strings', body=index_settings)

    return index_settings
Ejemplo n.º 27
0
class BaseIndex(object):
    def __init__(self, index_name=None):
        if index_name is None or index_name == "":
            raise SearchIndexError("Index name is not defined")

        self.se = SearchEngineFactory().create()
        self.index_metadata = None
        self.index_name = index_name

    def prepare_index(self):
        """
        Defines the Elastic Search mapping and settings for an index

        Arguments:
        None

        Keyword Arguments:
        None

        Return: None
        """

        if self.index_metadata is not None:
            self.se.create_index(index=self.index_name, body=self.index_metadata)
        else:
            raise SearchIndexError("No index metadata defined.")

    def get_documents_to_index(self, resourceinstance, tiles):
        """
        Gets a document to index into Elastic Search

        Arguments:
        resourceinstance -- resource instance object
        tiles -- list of tiles that make up the resource instance

        Keyword Arguments:
        None

        Return: tuple of (document, document id)
        """

        raise NotImplementedError

    def index_document(self, document=None, id=None):
        """
        Indexes a document into Elastic Search

        Arguments:
        None

        Keyword Arguments:
        document -- the document to index
        id -- the id of the document

        Return: None
        """

        if document is not None and id is not None:
            self.se.index_data(index=self.index_name, body=document, id=id)

    def index_resources(self, resources=None, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False):
        """
        Indexes a list of resources in bulk to Elastic Search

        Keyword Arguments:
        resources -- the list of resource instances to index
        batch_size -- the number of records to index as a group, the larger the number to more memory required
        quiet -- Silences the status bar output during certain operations, use in celery operations for example

        Return: None
        """

        start = datetime.now()
        q = Query(se=self.se)
        self.se.refresh(index=self.index_name)
        count_before = self.se.count(index=self.index_name, body=q.dsl)
        result_summary = {"database": len(resources), "indexed": 0}
        if quiet is False:
            bar = pyprind.ProgBar(len(resources), bar_char="█") if len(resources) > 1 else None
        with self.se.BulkIndexer(batch_size=batch_size, refresh=True) as indexer:
            for resource in resources:
                if quiet is False and bar is not None:
                    bar.update(item_id=resource)
                tiles = list(models.TileModel.objects.filter(resourceinstance=resource))
                document, doc_id = self.get_documents_to_index(resource, tiles)
                if document is not None and id is not None:
                    indexer.add(index=self.index_name, id=doc_id, data=document)

        self.se.refresh(index=self.index_name)
        result_summary["indexed"] = self.se.count(index=self.index_name, body=q.dsl) - count_before
        status = "Passed" if result_summary["database"] == result_summary["indexed"] else "Failed"
        print(f"Custom Index - {settings.ELASTICSEARCH_PREFIX}_{self.index_name}")
        print(
            f"    Status: {status}, In Database: {result_summary['database']}, Indexed: {result_summary['indexed']}, Took: {(datetime.now() - start).seconds} seconds"
        )

    def delete_resources(self, resources=None):
        """
        Deletes documents from an index based on the passed in list of resources
        Delete by query, so this is a single operation

        Keyword Arguments:
        resources -- a single resource instance or a list of resource instances
        """

        q = Query(se=self.se)
        if not isinstance(resources, list):
            resourcelist = [resources]
        else:
            resourcelist = resources
        list_of_ids_to_delete = []
        for resource in resourcelist:
            list_of_ids_to_delete.append(resource.pk)
        ids_query = Ids(ids=list_of_ids_to_delete)
        q.add_query(ids_query)
        q.delete(index=self.index_name)

    def delete_index(self):
        """
        Deletes this index from Elastic Search

        Arguments:
        None

        Keyword Arguments:
        None

        Return: None
        """

        self.se.delete_index(index=self.index_name)

    def reindex(self, graphids=None, clear_index=True, batch_size=settings.BULK_IMPORT_BATCH_SIZE, quiet=False):
        """
        Reindexes the index.  By default this does nothing, it needs to be implemented in a subclass.
        By default you can pass in a list of graph ids to trigger the reindex.  This will loop through all resource instances of each graph type.

            Example subclass command:
            def reindex(self, clear_index=True):
                PARCEL_GRAPHID = "e3c35dca-5e72-11ea-a2d3-dca90488358a"
                super(CustomIndexName, self).reindex(graphids=[PARCEL_GRAPHID], clear_index=clear_index)

        Keyword Arguments:
        graphids -- list of graphs ids to trigger the reindex on, will get all resource instances of each graph id supplied
        clear_index -- True(default) to clear all documents out of the index before reindexing begins
        batch_size -- the number of records to index as a group, the larger the number to more memory required

        Return: None
        """

        if graphids is not None:
            if clear_index:
                self.delete_index()
                self.prepare_index()

            for graphid in graphids:
                resources = Resource.objects.filter(graph_id=graphid)
                self.index_resources(resources=resources, batch_size=batch_size, quiet=quiet)
        else:
            raise NotImplementedError
Ejemplo n.º 28
0
class BaseIndex(object):
    def __init__(self, index_name=None):
        if index_name is None or index_name is "":
            raise SearchIndexError("Index name is not defined")

        self.se = SearchEngineFactory().create()
        self.index_metadata = None
        self.index_name = index_name

    def prepare_index(self):
        """
        Defines the Elastic Search mapping and settings for an index

        Arguments:
        None

        Keyword Arguments:
        None

        Return: None
        """

        if self.index_metadata is not None:
            self.se.create_index(index=self.index_name,
                                 body=self.index_metadata)
        else:
            raise SearchIndexError("No index metadata defined.")

    def get_documents_to_index(self, resourceinstance, tiles):
        """
        Gets a document to index into Elastic Search

        Arguments:
        resourceinstance -- resource instance object
        tiles -- list of tiles that make up the resource instance

        Keyword Arguments:
        None

        Return: tuple of (document, document id)
        """

        raise NotImplementedError

    def index_document(self, document=None, id=None):
        """
        Indexes a document into Elastic Search

        Arguments:
        None

        Keyword Arguments:
        document -- the document to index
        id -- the id of the document

        Return: None
        """

        if document is not None and id is not None:
            self.se.index_data(index=self.index_name, body=document, id=id)

    def bulk_index(self,
                   resources=None,
                   resource_type=None,
                   graph_name=None,
                   clear_index=True):
        """
        Indexes a list of documents in bulk to Elastic Search

        Arguments:
        None

        Keyword Arguments:
        resources -- the list of resource instances to index
        resource_type -- the type of resources being indexed
        graph_name -- the name of the graph model that represents the resources being indexed
        clear_index -- True(default) to remove all index records of type "resource_type" before indexing, 
            assumes that a field called "graph_id" exists on the indexed documents

        Return: None
        """

        start = datetime.now()
        q = Query(se=self.se)
        if clear_index:
            term = Term(field="graph_id", term=str(resource_type))
            q.add_query(term)
            q.delete(index=self.index_name, refresh=True)

        q = Query(se=self.se)
        count_before = self.se.count(index=self.index_name, body=q.dsl)

        result_summary = {"database": len(resources), "indexed": 0}
        with self.se.BulkIndexer(batch_size=settings.BULK_IMPORT_BATCH_SIZE,
                                 refresh=True) as indexer:
            for resource in resources:
                tiles = list(
                    models.TileModel.objects.filter(resourceinstance=resource))
                document, doc_id = self.get_documents_to_index(resource, tiles)
                if document is not None and id is not None:
                    indexer.add(index=self.index_name,
                                id=doc_id,
                                data=document)

        result_summary["indexed"] = self.se.count(index=self.index_name,
                                                  body=q.dsl) - count_before
        status = "Passed" if result_summary["database"] == result_summary[
            "indexed"] else "Failed"
        print("Custom Index - %s:" % self.index_name)
        print(
            "    Status: {0}, Resource Type: {1}, In Database: {2}, Indexed: {3}, Took: {4} seconds"
            .format(status, graph_name, result_summary["database"],
                    result_summary["indexed"],
                    (datetime.now() - start).seconds))

    def delete_index(self):
        """
        Deletes this index from Elastic Search

        Arguments:
        None

        Keyword Arguments:
        None

        Return: None
        """

        self.se.delete_index(index=self.index_name)
Ejemplo n.º 29
0
 def setUpClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index='terms,concepts')
     se.create_index(index='terms,concepts')
Ejemplo n.º 30
0
    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """
        index_settings = { 
            'settings':{
                'analysis': {
                    'analyzer': {
                        'folding': {
                            'tokenizer': 'standard',
                            'filter':  [ 'lowercase', 'asciifolding' ]
                        }
                    }
                }
            },
            'mappings': {
                resource_type_id : {
                    'properties' : {
                        'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'value' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'primaryname': {'type' : 'string', 'index' : 'not_analyzed'},
                        'child_entities' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    'type' : 'string',
                                    'index' : 'analyzed',
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'},
                                        'folded': { 'type': 'string', 'analyzer': 'folding'}
                                    }
                                }
                            }
                        },
                        'domains' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    'type' : 'string',
                                    'index' : 'analyzed',
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'}
                                    }
                                },
                                'conceptid' : {'type' : 'string', 'index' : 'not_analyzed'},
                            }
                        },
                        'geometries' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type": "geo_shape"
                                }
                            }
                        },
                        'dates' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type" : "date"
                                }
                            }
                        },
                        'extendeddates' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    'type' : 'integer'
                                }
                            }
                        },
                        'extendeddategroups' : { 
                            'properties' : {
                                'conceptid': {'type' : 'string', 'index' : 'not_analyzed'},
                                'value': {'type' : 'integer', 'index' : 'not_analyzed'}
                            }
                        }
                    }
                }
            }
        }

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)

        return index_settings
Ejemplo n.º 31
0
    def prepare_search_index(self, resource_type_id, create=False):
        """
        Creates the settings and mappings in Elasticsearch to support resource search

        """

        index_settings = { 
            'settings':{
                'analysis': {
                    'analyzer': {
                        'folding': {
                            'tokenizer': 'standard',
                            'filter':  [ 'lowercase', 'asciifolding' ]
                        }
                    }
                }
            },
            'mappings': {
                resource_type_id : {
                    'properties' : {
                        'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'value' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                        'primaryname': {'type' : 'string', 'index' : 'not_analyzed'},
                        'child_entities' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    'type' : 'string',
                                    'index' : 'analyzed',
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'},
                                        'folded': { 'type': 'string', 'analyzer': 'folding'}
                                    }
                                }
                            }
                        },
                        'domains' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    'type' : 'string',
                                    'index' : 'analyzed',
                                    'fields' : {
                                        'raw' : { 'type' : 'string', 'index' : 'not_analyzed'}
                                    }
                                },
                                'conceptid' : {'type' : 'string', 'index' : 'not_analyzed'},
                            }
                        },
                        'geometries' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type": "geo_shape"
                                }
                            }
                        },
                        'dates' : { 
                            'type' : 'nested', 
                            'index' : 'analyzed',
                            'properties' : {
                                'entityid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'parentid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'property' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'entitytypeid' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'businesstablename' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'label' : {'type' : 'string', 'index' : 'not_analyzed'},
                                'value' : {
                                    "type" : "date"
                                }
                            }
                        }
                    }
                }
            }
        }

        if create:
            se = SearchEngineFactory().create()
            try:
                se.create_index(index='entity', body=index_settings)
            except:
                index_settings = index_settings['mappings']
                se.create_mapping(index='entity', doc_type=resource_type_id, body=index_settings)

        return index_settings
Ejemplo n.º 32
0
	def tearDownClass(cls):
		se = SearchEngineFactory().create()
		se.delete_index(index='concept_labels')
		se.delete_index(index='term')
		se.create_index(index='concept_labels')
		se.create_index(index='term')
Ejemplo n.º 33
0
def prepare_search_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support resource search

    """

    index_settings = {
        "settings": {
            "analysis": {
                "analyzer": {
                    "folding": {
                        "tokenizer": "standard",
                        "filter": ["lowercase", "asciifolding"]
                    }
                }
            }
        },
        "mappings": {
            "_doc": {
                "properties": {
                    "graph_id": {
                        "type": "keyword"
                    },
                    "legacyid": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "ignore_above": 256,
                                "type": "keyword"
                            }
                        }
                    },
                    "resourceinstanceid": {
                        "type": "keyword"
                    },
                    "root_ontology_class": {
                        "type": "keyword"
                    },
                    "displayname": {
                        "type": "keyword"
                    },
                    "displaydescription": {
                        "type": "keyword"
                    },
                    "map_popup": {
                        "type": "keyword"
                    },
                    "provisional_resource": {
                        "type": "keyword"
                    },
                    "tiles": {
                        "type": "nested",
                        "properties": {
                            "tiles": {
                                "enabled": False
                            },
                            "tileid": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "parenttile_id": {
                                "type": "keyword"
                            },
                            "resourceinstanceid_id": {
                                "type": "keyword"
                            },
                            "provisionaledits": {
                                "enabled": False
                            },
                        },
                    },
                    "strings": {
                        "type": "nested",
                        "properties": {
                            "string": {
                                "type": "text",
                                "fields": {
                                    "raw": {
                                        "type": "keyword"
                                    },
                                    "folded": {
                                        "type": "text",
                                        "analyzer": "folding"
                                    }
                                },
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "ids": {
                        "type": "nested",
                        "properties": {
                            "id": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            }
                        },
                    },
                    "domains": {
                        "type": "nested",
                        "properties": {
                            "value": {
                                "type": "text",
                                "fields": {
                                    "raw": {
                                        "type": "keyword"
                                    }
                                }
                            },
                            "conceptid": {
                                "type": "keyword"
                            },
                            "valueid": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "geometries": {
                        "type": "nested",
                        "properties": {
                            "geom": {
                                "properties": {
                                    "features": {
                                        "properties": {
                                            "geometry": {
                                                "type": "geo_shape"
                                            },
                                            "id": {
                                                "type": "keyword"
                                            },
                                            "type": {
                                                "type": "keyword"
                                            },
                                            "properties": {
                                                "enabled": False
                                            },
                                        }
                                    },
                                    "type": {
                                        "type": "keyword"
                                    },
                                }
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "points": {
                        "type": "nested",
                        "properties": {
                            "point": {
                                "type": "geo_point"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "dates": {
                        "type": "nested",
                        "properties": {
                            "date": {
                                "type": "float"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "nodeid": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "numbers": {
                        "type": "nested",
                        "properties": {
                            "number": {
                                "type": "double"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "date_ranges": {
                        "type": "nested",
                        "properties": {
                            "date_range": {
                                "type": "float_range"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                }
            }
        },
    }

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index="resources", body=index_settings)

    return index_settings
Ejemplo n.º 34
0
 def tearDownClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index="concept_labels")
     se.delete_index(index="term")
     se.create_index(index="concept_labels")
     se.create_index(index="term")
Ejemplo n.º 35
0
 def tearDownClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index='strings')
     se.create_index(index='strings')
Ejemplo n.º 36
0
 def tearDownClass(cls):
     se = SearchEngineFactory().create()
     se.delete_index(index="terms,concepts")
     se.create_index(index="terms,concepts")
Ejemplo n.º 37
0
def prepare_search_index(create=False):
    """
    Creates the settings and mappings in Elasticsearch to support resource search

    """

    index_settings = {
        "settings": {
            "analysis": {
                "analyzer": {
                    "folding": {
                        "tokenizer": "standard",
                        "filter": ["lowercase", "asciifolding"]
                    }
                }
            },
            "index.mapping.total_fields.limit": 20000,
        },
        "mappings": {
            "_doc": {
                "properties": {
                    "graph_id": {
                        "type": "keyword"
                    },
                    "legacyid": {
                        "type": "text",
                        "fields": {
                            "keyword": {
                                "ignore_above": 256,
                                "type": "keyword"
                            }
                        }
                    },
                    "resourceinstanceid": {
                        "type": "keyword"
                    },
                    "root_ontology_class": {
                        "type": "keyword"
                    },
                    "displayname": {
                        "type": "keyword"
                    },
                    "displaydescription": {
                        "type": "keyword"
                    },
                    "map_popup": {
                        "type": "keyword"
                    },
                    "provisional_resource": {
                        "type": "keyword"
                    },
                    "tiles": {
                        "type": "nested",
                        "properties": {
                            "tiles": {
                                "enabled": False
                            },
                            "tileid": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "parenttile_id": {
                                "type": "keyword"
                            },
                            "resourceinstanceid_id": {
                                "type": "keyword"
                            },
                            "provisionaledits": {
                                "enabled": False
                            },
                            "data": {
                                "properties": {}
                            },
                        },
                    },
                    "permissions": {
                        "type": "nested",
                        "properties": {
                            "users_without_read_perm": {
                                "type": "integer"
                            },
                            "users_without_edit_perm": {
                                "type": "integer"
                            },
                            "users_without_delete_perm": {
                                "type": "integer"
                            },
                            "users_with_no_access": {
                                "type": "integer"
                            },
                        },
                    },
                    "strings": {
                        "type": "nested",
                        "properties": {
                            "string": {
                                "type": "text",
                                "fields": {
                                    "raw": {
                                        "type": "keyword"
                                    },
                                    "folded": {
                                        "type": "text",
                                        "analyzer": "folding"
                                    }
                                },
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "ids": {
                        "type": "nested",
                        "properties": {
                            "id": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            }
                        },
                    },
                    "domains": {
                        "type": "nested",
                        "properties": {
                            "value": {
                                "type": "text",
                                "fields": {
                                    "raw": {
                                        "type": "keyword"
                                    }
                                }
                            },
                            "conceptid": {
                                "type": "keyword"
                            },
                            "valueid": {
                                "type": "keyword"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "geometries": {
                        "type": "nested",
                        "properties": {
                            "geom": {
                                "properties": {
                                    "features": {
                                        "properties": {
                                            "geometry": {
                                                "type": "geo_shape"
                                            },
                                            "id": {
                                                "type": "keyword"
                                            },
                                            "type": {
                                                "type": "keyword"
                                            },
                                            "properties": {
                                                "enabled": False
                                            },
                                        }
                                    },
                                    "type": {
                                        "type": "keyword"
                                    },
                                }
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "points": {
                        "type": "nested",
                        "properties": {
                            "point": {
                                "type": "geo_point"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "dates": {
                        "type": "nested",
                        "properties": {
                            "date": {
                                "type": "integer"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "nodeid": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "numbers": {
                        "type": "nested",
                        "properties": {
                            "number": {
                                "type": "double"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                    "date_ranges": {
                        "type": "nested",
                        "properties": {
                            "date_range": {
                                "type": "integer_range"
                            },
                            "nodegroup_id": {
                                "type": "keyword"
                            },
                            "provisional": {
                                "type": "boolean"
                            },
                        },
                    },
                }
            }
        },
    }
    try:
        datatype_factory = DataTypeFactory()
        data = index_settings["mappings"]["_doc"]["properties"]["tiles"][
            "properties"]["data"]["properties"]
        for node in models.Node.objects.all():
            datatype = datatype_factory.get_instance(node.datatype)
            datatype_mapping = datatype.default_es_mapping()
            if datatype_mapping and datatype_factory.datatypes[
                    node.datatype].defaultwidget:
                data[str(node.nodeid)] = datatype_mapping
    except ProgrammingError:
        print(
            "Skipping datatype mappings because the datatypes table is not yet available"
        )

    if create:
        se = SearchEngineFactory().create()
        se.create_index(index="resources", body=index_settings)

    return index_settings